[14/17] incubator-sdap-mudrod git commit: SDAP-7 Change all package namespaces to org.apache.sdap

lewismc Tue, 19 Dec 2017 06:13:43 -0800

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/package-info.java
deleted file mode 100644
index 2febf96..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes the preprocessing required by recommendation module.
- */
-package gov.nasa.jpl.mudrod.recommendation.pre;
\ No newline at end of file


http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/AbstractBasedSimilarity.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/AbstractBasedSimilarity.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/AbstractBasedSimilarity.java
deleted file mode 100644
index b0e93fc..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/AbstractBasedSimilarity.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Project Name:mudrod-core
- * File Name:TopicBasedCF.java
- * Package Name:gov.nasa.jpl.mudrod.recommendation.process
- * Date:Aug 22, 201610:45:55 AM
- * Copyright (c) 2016, [email protected] All Rights Reserved.
- */
-
-package gov.nasa.jpl.mudrod.recommendation.process;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.semantics.SVDAnalyzer;
-import gov.nasa.jpl.mudrod.utils.LinkageTriple;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.List;
-import java.util.Properties;
-
-/**
- * ClassName: Recommend metedata based on data content semantic similarity
- */
-public class AbstractBasedSimilarity extends DiscoveryStepAbstract {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(AbstractBasedSimilarity.class);
-
-  /**
-   * Creates a new instance of TopicBasedCF.
-   *
-   * @param props the Mudrod configuration
-   * @param es    the Elasticsearch client
-   * @param spark the spark drive
-   */
-  public AbstractBasedSimilarity(Properties props, ESDriver es, SparkDriver 
spark) {
-    super(props, es, spark);
-  }
-
-  @Override
-  public Object execute() {
-
-    LOG.info("*****************abstract similarity calculation 
starts******************");
-    startTime = System.currentTimeMillis();
-
-    try {
-      /*String topicMatrixFile = 
props.getProperty("metadata_term_tfidf_matrix");
-      SemanticAnalyzer analyzer = new SemanticAnalyzer(props, es, spark);
-      List<LinkageTriple> triples = analyzer
-          .calTermSimfromMatrix(topicMatrixFile);
-      analyzer.saveToES(triples, props.getProperty("indexName"),
-          props.getProperty("metadataTermTFIDFSimType"), true, true);*/
-
-      // for comparison
-      SVDAnalyzer svd = new SVDAnalyzer(props, es, spark);
-      svd.getSVDMatrix(props.getProperty("metadata_word_tfidf_matrix"), 150, 
props.getProperty("metadata_word_tfidf_matrix"));
-      List<LinkageTriple> tripleList = 
svd.calTermSimfromMatrix(props.getProperty("metadata_word_tfidf_matrix"));
-      svd.saveToES(tripleList, props.getProperty("indexName"), 
props.getProperty("metadataWordTFIDFSimType"), true, true);
-
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-
-    endTime = System.currentTimeMillis();
-    LOG.info("*****************abstract similarity calculation 
ends******************Took {}s", (endTime - startTime) / 1000);
-
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/VariableBasedSimilarity.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/VariableBasedSimilarity.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/VariableBasedSimilarity.java
deleted file mode 100644
index 67aeeb8..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/VariableBasedSimilarity.java
+++ /dev/null
@@ -1,380 +0,0 @@
-package gov.nasa.jpl.mudrod.recommendation.process;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.text.DecimalFormat;
-import java.util.*;
-
-import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
-
-public class VariableBasedSimilarity extends DiscoveryStepAbstract implements 
Serializable {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(VariableBasedSimilarity.class);
-
-  private DecimalFormat df = new DecimalFormat("#.000");
-  // a map from variable to its type
-  public Map<String, Integer> variableTypes;
-  public Map<String, Integer> variableWeights;
-
-  private static final Integer VAR_SPATIAL = 1;
-  private static final Integer VAR_TEMPORAL = 2;
-  private static final Integer VAR_CATEGORICAL = 3;
-  private static final Integer VAR_ORDINAL = 4;
-
-  // index name
-  private String indexName;
-  // type name of metadata in ES
-  private String metadataType;
-  private String variableSimType;
-
-  /**
-   * Creates a new instance of OHEncoder.
-   *
-   * @param props the Mudrod configuration
-   * @param es    an instantiated {@link ESDriver}
-   * @param spark an instantiated {@link SparkDriver}
-   */
-  public VariableBasedSimilarity(Properties props, ESDriver es, SparkDriver 
spark) {
-    super(props, es, spark);
-
-    indexName = props.getProperty("indexName");
-    metadataType = props.getProperty("recom_metadataType");
-    variableSimType = props.getProperty("metadataCodeSimType");
-    this.inital();
-  }
-
-  @Override
-  public Object execute() {
-    LOG.info("*****************calculating metadata variables based similarity 
starts******************");
-    startTime = System.currentTimeMillis();
-    es.deleteType(indexName, variableSimType);
-    addMapping(es, indexName, variableSimType);
-
-    VariableBasedSimilarity(es);
-    es.refreshIndex();
-    normalizeVariableWeight(es);
-    es.refreshIndex();
-    endTime = System.currentTimeMillis();
-    LOG.info("*****************calculating metadata variables based similarity 
ends******************Took {}s", (endTime - startTime) / 1000);
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-  public void inital() {
-    this.initVariableType();
-    this.initVariableWeight();
-  }
-
-  private void initVariableType() {
-    variableTypes = new HashMap<>();
-
-    variableTypes.put("DatasetParameter-Variable", VAR_CATEGORICAL);
-    variableTypes.put("DatasetRegion-Region", VAR_CATEGORICAL);
-    variableTypes.put("Dataset-ProjectionType", VAR_CATEGORICAL);
-    variableTypes.put("Dataset-ProcessingLevel", VAR_CATEGORICAL);
-    variableTypes.put("DatasetParameter-Topic", VAR_CATEGORICAL);
-    variableTypes.put("DatasetParameter-Term", VAR_CATEGORICAL);
-    variableTypes.put("DatasetParameter-Category", VAR_CATEGORICAL);
-    variableTypes.put("DatasetPolicy-DataFormat", VAR_CATEGORICAL);
-    variableTypes.put("Collection-ShortName", VAR_CATEGORICAL);
-    variableTypes.put("DatasetSource-Source-Type", VAR_CATEGORICAL);
-    variableTypes.put("DatasetSource-Source-ShortName", VAR_CATEGORICAL);
-    variableTypes.put("DatasetSource-Sensor-ShortName", VAR_CATEGORICAL);
-    variableTypes.put("DatasetPolicy-Availability", VAR_CATEGORICAL);
-    variableTypes.put("Dataset-Provider-ShortName", VAR_CATEGORICAL);
-
-    variableTypes.put("Dataset-Derivative-ProcessingLevel", VAR_ORDINAL);
-    variableTypes.put("Dataset-Derivative-TemporalResolution", VAR_ORDINAL);
-    variableTypes.put("Dataset-Derivative-SpatialResolution", VAR_ORDINAL);
-  }
-
-  private void initVariableWeight() {
-    variableWeights = new HashMap<>();
-
-    variableWeights.put("Dataset-Derivative-ProcessingLevel", 5);
-    variableWeights.put("DatasetParameter-Category", 5);
-    variableWeights.put("DatasetParameter-Variable", 5);
-    variableWeights.put("DatasetSource-Sensor-ShortName", 5);
-
-    variableWeights.put("DatasetPolicy-Availability", 4);
-    variableWeights.put("DatasetRegion-Region", 4);
-    variableWeights.put("DatasetSource-Source-Type", 4);
-    variableWeights.put("DatasetSource-Source-ShortName", 4);
-    variableWeights.put("DatasetParameter-Term", 4);
-    variableWeights.put("DatasetPolicy-DataFormat", 4);
-    variableWeights.put("Dataset-Derivative-SpatialResolution", 4);
-    variableWeights.put("Temporal_Covergae", 4);
-
-    variableWeights.put("DatasetParameter-Topic", 3);
-    variableWeights.put("Collection-ShortName", 3);
-    variableWeights.put("Dataset-Derivative-TemporalResolution", 3);
-    variableWeights.put("Spatial_Covergae", 3);
-
-    variableWeights.put("Dataset-ProjectionType", 1);
-    variableWeights.put("Dataset-Provider-ShortName", 1);
-  }
-
-  public void VariableBasedSimilarity(ESDriver es) {
-
-    es.createBulkProcessor();
-
-    List<Map<String, Object>> metadatas = new ArrayList<>();
-    SearchResponse scrollResp = 
es.getClient().prepareSearch(indexName).setTypes(metadataType).setScroll(new 
TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery()).setSize(100).execute()
-        .actionGet();
-    while (true) {
-      for (SearchHit hit : scrollResp.getHits().getHits()) {
-        Map<String, Object> metadataA = hit.getSource();
-        metadatas.add(metadataA);
-      }
-
-      scrollResp = 
es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new 
TimeValue(600000)).execute().actionGet();
-      if (scrollResp.getHits().getHits().length == 0) {
-        break;
-      }
-    }
-
-    int size = metadatas.size();
-    for (int i = 0; i < size; i++) {
-      Map<String, Object> metadataA = metadatas.get(i);
-      String shortNameA = (String) metadataA.get("Dataset-ShortName");
-
-      for (int j = 0; j < size; j++) {
-        metadataA = metadatas.get(i);
-        Map<String, Object> metadataB = metadatas.get(j);
-        String shortNameB = (String) metadataB.get("Dataset-ShortName");
-
-        try {
-          XContentBuilder contentBuilder = jsonBuilder().startObject();
-          contentBuilder.field("concept_A", shortNameA);
-          contentBuilder.field("concept_B", shortNameB);
-
-          // spatial similarity
-          this.spatialSimilarity(metadataA, metadataB, contentBuilder);
-          // temporal similarity
-          this.temporalSimilarity(metadataA, metadataB, contentBuilder);
-          // categorical variables similarity
-          this.categoricalVariablesSimilarity(metadataA, metadataB, 
contentBuilder);
-          // ordinal variables similarity
-          this.ordinalVariablesSimilarity(metadataA, metadataB, 
contentBuilder);
-
-          contentBuilder.endObject();
-
-          IndexRequest ir = new IndexRequest(indexName, 
variableSimType).source(contentBuilder);
-          es.getBulkProcessor().add(ir);
-
-        } catch (IOException e1) {
-          e1.printStackTrace();
-        }
-
-      }
-    }
-
-    es.destroyBulkProcessor();
-  }
-
-  /*
-   * refer to P. Frontiera, R. Larson, and J. Radke (2008) A comparison of
-     geometric approaches to assessing spatial similarity for GIR.
-     International Journal of Geographical Information Science,
-     22(3)
-   */
-  public void spatialSimilarity(Map<String, Object> metadataA, Map<String, 
Object> metadataB, XContentBuilder contentBuilder) throws IOException {
-
-    double topA = (double) 
metadataA.get("DatasetCoverage-Derivative-NorthLat");
-    double bottomA = (double) 
metadataA.get("DatasetCoverage-Derivative-SouthLat");
-    double leftA = (double) 
metadataA.get("DatasetCoverage-Derivative-WestLon");
-    double rightA = (double) 
metadataA.get("DatasetCoverage-Derivative-EastLon");
-    double areaA = (double) metadataA.get("DatasetCoverage-Derivative-Area");
-
-    double topB = (double) 
metadataB.get("DatasetCoverage-Derivative-NorthLat");
-    double bottomB = (double) 
metadataB.get("DatasetCoverage-Derivative-SouthLat");
-    double leftB = (double) 
metadataB.get("DatasetCoverage-Derivative-WestLon");
-    double rightB = (double) 
metadataB.get("DatasetCoverage-Derivative-EastLon");
-    double areaB = (double) metadataB.get("DatasetCoverage-Derivative-Area");
-
-    // Intersect area
-    double xOverlap = Math.max(0, Math.min(rightA, rightB) - Math.max(leftA, 
leftB));
-    double yOverlap = Math.max(0, Math.min(topA, topB) - Math.max(bottomA, 
bottomB));
-    double overlapArea = xOverlap * yOverlap;
-
-    // Calculate coverage similarity
-    double similarity = 0.0;
-    if (areaA > 0 && areaB > 0) {
-      similarity = (overlapArea / areaA + overlapArea / areaB) * 0.5;
-    }
-
-    contentBuilder.field("Spatial_Covergae_Sim", similarity);
-  }
-
-  public void temporalSimilarity(Map<String, Object> metadataA, Map<String, 
Object> metadataB, XContentBuilder contentBuilder) throws IOException {
-
-    double similarity = 0.0;
-    double startTimeA = Double.parseDouble((String) 
metadataA.get("Dataset-DatasetCoverage-StartTimeLong"));
-    String endTimeAStr = (String) 
metadataA.get("Dataset-DatasetCoverage-StopTimeLong");
-    double endTimeA = 0.0;
-    if ("".equals(endTimeAStr)) {
-      endTimeA = System.currentTimeMillis();
-    } else {
-      endTimeA = Double.parseDouble(endTimeAStr);
-    }
-    double timespanA = endTimeA - startTimeA;
-
-    double startTimeB = Double.parseDouble((String) 
metadataB.get("Dataset-DatasetCoverage-StartTimeLong"));
-    String endTimeBStr = (String) 
metadataB.get("Dataset-DatasetCoverage-StopTimeLong");
-    double endTimeB = 0.0;
-    if ("".equals(endTimeBStr)) {
-      endTimeB = System.currentTimeMillis();
-    } else {
-      endTimeB = Double.parseDouble(endTimeBStr);
-    }
-    double timespanB = endTimeB - startTimeB;
-
-    double intersect = 0.0;
-    if (startTimeB >= endTimeA || endTimeB <= startTimeA) {
-      intersect = 0.0;
-    } else if (startTimeB >= startTimeA && endTimeB <= endTimeA) {
-      intersect = timespanB;
-    } else if (startTimeA >= startTimeB && endTimeA <= endTimeB) {
-      intersect = timespanA;
-    } else {
-      intersect = (startTimeA > startTimeB) ? (endTimeB - startTimeA) : 
(endTimeA - startTimeB);
-    }
-
-    similarity = intersect / (Math.sqrt(timespanA) * Math.sqrt(timespanB));
-    contentBuilder.field("Temporal_Covergae_Sim", similarity);
-  }
-
-  public void categoricalVariablesSimilarity(Map<String, Object> metadataA, 
Map<String, Object> metadataB, XContentBuilder contentBuilder) throws 
IOException {
-
-    for (String variable : variableTypes.keySet()) {
-      Integer type = variableTypes.get(variable);
-      if (type != VAR_CATEGORICAL) {
-        continue;
-      }
-
-      double similarity = 0.0;
-      Object valueA = metadataA.get(variable);
-      Object valueB = metadataB.get(variable);
-      if (valueA instanceof ArrayList) {
-        ArrayList<String> aList = (ArrayList<String>) valueA;
-        ArrayList<String> bList = (ArrayList<String>) valueB;
-        if (aList != null && bList != null) {
-
-          int lengthA = aList.size();
-          int lengthB = bList.size();
-          List<String> newAList = new ArrayList<>(aList);
-          List<String> newBList = new ArrayList<>(bList);
-          newAList.retainAll(newBList);
-          similarity = newAList.size() / lengthA;
-        }
-
-      } else if (valueA instanceof String) {
-        if (valueA.equals(valueB)) {
-          similarity = 1.0;
-        }
-      }
-
-      contentBuilder.field(variable + "_Sim", similarity);
-    }
-  }
-
-  public void ordinalVariablesSimilarity(Map<String, Object> metadataA, 
Map<String, Object> metadataB, XContentBuilder contentBuilder) throws 
IOException {
-    for (String variable : variableTypes.keySet()) {
-      Integer type = variableTypes.get(variable);
-      if (type != VAR_ORDINAL) {
-        continue;
-      }
-
-      double similarity = 0.0;
-      Object valueA = metadataA.get(variable);
-      Object valueB = metadataB.get(variable);
-      if (valueA != null && valueB != null) {
-
-        double a = (double) valueA;
-        double b = (double) valueB;
-        if (a != 0.0) {
-          similarity = 1 - Math.abs(b - a) / a;
-          if (similarity < 0) {
-            similarity = 0.0;
-          }
-        }
-      }
-
-      contentBuilder.field(variable + "_Sim", similarity);
-    }
-  }
-
-  public static void addMapping(ESDriver es, String index, String type) {
-    XContentBuilder Mapping;
-    try {
-      Mapping = 
jsonBuilder().startObject().startObject(type).startObject("properties").startObject("concept_A").field("type",
 "string").field("index", "not_analyzed").endObject()
-          .startObject("concept_B").field("type", "string").field("index", 
"not_analyzed").endObject()
-
-          .endObject().endObject().endObject();
-
-      
es.getClient().admin().indices().preparePutMapping(index).setType(type).setSource(Mapping).execute().actionGet();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  public void normalizeVariableWeight(ESDriver es) {
-
-    es.createBulkProcessor();
-
-    double totalWeight = 0.0;
-    for (String variable : variableWeights.keySet()) {
-      totalWeight += variableWeights.get(variable);
-    }
-
-    SearchResponse scrollResp = 
es.getClient().prepareSearch(indexName).setTypes(variableSimType).setScroll(new 
TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery()).setSize(100).execute()
-        .actionGet();
-    while (true) {
-      for (SearchHit hit : scrollResp.getHits().getHits()) {
-        Map<String, Object> similarities = hit.getSource();
-
-        double totalSim = 0.0;
-        for (String variable : variableWeights.keySet()) {
-          if (similarities.containsKey(variable + "_Sim")) {
-            double value = (double) similarities.get(variable + "_Sim");
-            double weight = variableWeights.get(variable);
-            totalSim += weight * value;
-          }
-        }
-
-        double weight = totalSim / totalWeight;
-        UpdateRequest ur = es.generateUpdateRequest(indexName, 
variableSimType, hit.getId(), "weight", weight);
-        es.getBulkProcessor().add(ur);
-      }
-
-      scrollResp = 
es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new 
TimeValue(600000)).execute().actionGet();
-      if (scrollResp.getHits().getHits().length == 0) {
-        break;
-      }
-    }
-
-    es.destroyBulkProcessor();
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/package-info.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/package-info.java
deleted file mode 100644
index 84231f7..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes the processing required by recommendation module.
- */
-package gov.nasa.jpl.mudrod.recommendation.process;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/sessionBasedCF.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/sessionBasedCF.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/sessionBasedCF.java
deleted file mode 100644
index ae55769..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/process/sessionBasedCF.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Project Name:mudrod-core
- * File Name:sessionBasedCF.java
- * Package Name:gov.nasa.jpl.mudrod.recommendation.process
- * Date:Aug 19, 20163:17:00 PM
- * Copyright (c) 2016, [email protected] All Rights Reserved.
- */
-
-package gov.nasa.jpl.mudrod.recommendation.process;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.semantics.SemanticAnalyzer;
-import gov.nasa.jpl.mudrod.utils.LinkageTriple;
-import gov.nasa.jpl.mudrod.utils.SimilarityUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.util.List;
-import java.util.Properties;
-
-/**
- * ClassName: Recommend metedata based on session level co-occurrence
- */
-public class sessionBasedCF extends DiscoveryStepAbstract {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(sessionBasedCF.class);
-
-  /**
-   * Creates a new instance of sessionBasedCF.
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch drive
-   * @param spark
-   *          the spark drive
-   */
-  public sessionBasedCF(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  @Override
-  public Object execute() {
-    LOG.info("*****************Session based metadata similarity 
starts******************");
-    startTime = System.currentTimeMillis();
-
-    try {
-      String session_metadatFile = 
props.getProperty("session_metadata_Matrix");
-      File f = new File(session_metadatFile);
-      if (f.exists()) {
-        SemanticAnalyzer analyzer = new SemanticAnalyzer(props, es, spark);
-        List<LinkageTriple> triples = 
analyzer.calTermSimfromMatrix(session_metadatFile, SimilarityUtil.SIM_PEARSON, 
1);
-        analyzer.saveToES(triples, props.getProperty("indexName"), 
props.getProperty("metadataSessionBasedSimType"), true, false);
-      }
-
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-
-    endTime = System.currentTimeMillis();
-    LOG.info("*****************Session based metadata similarity 
ends******************Took {}s", (endTime - startTime) / 1000);
-
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/HybridRecommendation.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/HybridRecommendation.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/HybridRecommendation.java
deleted file mode 100644
index 4163fda..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/HybridRecommendation.java
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.recommendation.structure;
-
-import com.google.gson.Gson;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodEngine;
-import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.sort.SortOrder;
-
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.*;
-
-/**
- * Recommend metadata using combination all two methods, including 
content-based
- * similarity and session-level similarity
- */
-public class HybridRecommendation extends DiscoveryStepAbstract {
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  // recommended metadata list
-  protected transient List<LinkedTerm> termList = new ArrayList<>();
-  // format decimal
-  DecimalFormat df = new DecimalFormat("#.00");
-  // index name
-  protected static final String INDEX_NAME = "indexName";
-  private static final String WEIGHT = "weight";
-
-  /**
-   * recommended data class Date: Sep 12, 2016 2:25:28 AM
-   */
-  class LinkedTerm {
-    public String term = null;
-    public double weight = 0;
-    public String model = null;
-
-    public LinkedTerm(String str, double w, String m) {
-      term = str;
-      weight = w;
-      model = m;
-    }
-  }
-
-  public HybridRecommendation(Properties props, ESDriver es, SparkDriver 
spark) {
-    super(props, es, spark);
-  }
-
-  @Override
-  public Object execute() {
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-  /**
-   * Get recommended data for a giving dataset
-   *
-   * @param input: a giving dataset
-   * @param num:   the number of recommended dataset
-   * @return recommended dataset in json format
-   */
-  public JsonObject getRecomDataInJson(String input, int num) {
-    JsonObject resultJson = new JsonObject();
-
-    String type = props.getProperty("metadataCodeSimType");
-    Map<String, Double> sortedVariableSimMap = getRelatedData(type, input, num 
+ 10);
-
-    type = props.getProperty("metadataWordTFIDFSimType");
-    Map<String, Double> sortedAbstractSimMap = getRelatedData(type, input, num 
+ 10);
-
-    type = props.getProperty("metadataSessionBasedSimType");
-    Map<String, Double> sortedSessionSimMap = getRelatedData(type, input, num 
+ 10);
-
-    JsonElement variableSimJson = mapToJson(sortedVariableSimMap, num);
-    resultJson.add("variableSim", variableSimJson);
-    JsonElement abstractSimJson = mapToJson(sortedAbstractSimMap, num);
-    resultJson.add("abstractSim", abstractSimJson);
-    JsonElement sessionSimJson = mapToJson(sortedSessionSimMap, num);
-    resultJson.add("sessionSim", sessionSimJson);
-
-    Map<String, Double> hybirdSimMap = new HashMap<String, Double>();
-
-    for (String name : sortedAbstractSimMap.keySet()) {
-      hybirdSimMap.put(name, sortedAbstractSimMap.get(name) /** 0.4 */);
-    }
-
-    for (String name : sortedVariableSimMap.keySet()) {
-      if (hybirdSimMap.get(name) != null) {
-        double sim = hybirdSimMap.get(name) + sortedVariableSimMap.get(name) 
/** 0.3 */;
-        hybirdSimMap.put(name, Double.parseDouble(df.format(sim)));
-      } else {
-        double sim = sortedVariableSimMap.get(name);
-        hybirdSimMap.put(name, Double.parseDouble(df.format(sim)));
-      }
-    }
-
-    for (String name : sortedSessionSimMap.keySet()) {
-      if (hybirdSimMap.get(name) != null) {
-        double sim = hybirdSimMap.get(name) + sortedSessionSimMap.get(name) 
/** 0.1 */;
-        hybirdSimMap.put(name, Double.parseDouble(df.format(sim)));
-      } else {
-        double sim = sortedSessionSimMap.get(name);
-        hybirdSimMap.put(name, Double.parseDouble(df.format(sim)));
-      }
-    }
-
-    Map<String, Double> sortedHybirdSimMap = this.sortMapByValue(hybirdSimMap);
-
-    JsonElement linkedJson = mapToJson(sortedHybirdSimMap, num);
-    resultJson.add("linked", linkedJson);
-
-    return resultJson;
-  }
-
-  /**
-   * Method of converting hashmap to JSON
-   *
-   * @param wordweights a map from related metadata to weights
-   * @param num         the number of converted elements
-   * @return converted JSON object
-   */
-  protected JsonElement mapToJson(Map<String, Double> wordweights, int num) {
-    Gson gson = new Gson();
-
-    List<JsonObject> nodes = new ArrayList<>();
-    Set<String> words = wordweights.keySet();
-    int i = 0;
-    for (String wordB : words) {
-      JsonObject node = new JsonObject();
-      node.addProperty("name", wordB);
-      node.addProperty("weight", wordweights.get(wordB));
-      nodes.add(node);
-
-      i += 1;
-      if (i >= num) {
-        break;
-      }
-    }
-
-    String nodesJson = gson.toJson(nodes);
-    JsonElement nodesElement = gson.fromJson(nodesJson, JsonElement.class);
-
-    return nodesElement;
-  }
-
-  /**
-   * Get recommend dataset for a giving dataset
-   *
-   * @param type  recommend method
-   * @param input a giving dataset
-   * @param num   the number of recommended dataset
-   * @return recommended dataset map, key is dataset name, value is similarity
-   * value
-   */
-  public Map<String, Double> getRelatedData(String type, String input, int 
num) {
-    termList = new ArrayList<>();
-    Map<String, Double> termsMap = new HashMap<>();
-    Map<String, Double> sortedMap = new HashMap<>();
-    try {
-      List<LinkedTerm> links = getRelatedDataFromES(type, input, num);
-      int size = links.size();
-      for (int i = 0; i < size; i++) {
-        termsMap.put(links.get(i).term, links.get(i).weight);
-      }
-
-      sortedMap = sortMapByValue(termsMap); // terms_map will be empty
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-
-    return sortedMap;
-  }
-
-  /**
-   * Get recommend dataset for a giving dataset
-   *
-   * @param type  recommend method
-   * @param input a giving dataset
-   * @param num   the number of recommended dataset
-   * @return recommended dataset list
-   */
-  public List<LinkedTerm> getRelatedDataFromES(String type, String input, int 
num) {
-
-    SearchRequestBuilder builder = 
es.getClient().prepareSearch(props.getProperty(INDEX_NAME)).setTypes(type).setQuery(QueryBuilders.termQuery("concept_A",
 input)).addSort(WEIGHT, SortOrder.DESC)
-        .setSize(num);
-
-    SearchResponse usrhis = builder.execute().actionGet();
-
-    for (SearchHit hit : usrhis.getHits().getHits()) {
-      Map<String, Object> result = hit.getSource();
-      String conceptB = (String) result.get("concept_B");
-
-      if (!conceptB.equals(input)) {
-        LinkedTerm lTerm = new LinkedTerm(conceptB, (double) 
result.get(WEIGHT), type);
-        termList.add(lTerm);
-      }
-    }
-
-    return termList;
-  }
-
-  /**
-   * Method of sorting a map by value
-   *
-   * @param passedMap input map
-   * @return sorted map
-   */
-  public Map<String, Double> sortMapByValue(Map<String, Double> passedMap) {
-    List<String> mapKeys = new ArrayList<>(passedMap.keySet());
-    List<Double> mapValues = new ArrayList<>(passedMap.values());
-    Collections.sort(mapValues, Collections.reverseOrder());
-    Collections.sort(mapKeys, Collections.reverseOrder());
-
-    LinkedHashMap<String, Double> sortedMap = new LinkedHashMap<>();
-
-    Iterator<Double> valueIt = mapValues.iterator();
-    while (valueIt.hasNext()) {
-      Object val = valueIt.next();
-      Iterator<String> keyIt = mapKeys.iterator();
-
-      while (keyIt.hasNext()) {
-        Object key = keyIt.next();
-        String comp1 = passedMap.get(key).toString();
-        String comp2 = val.toString();
-
-        if (comp1.equals(comp2)) {
-          passedMap.remove(key);
-          mapKeys.remove(key);
-          sortedMap.put((String) key, (Double) val);
-          break;
-        }
-      }
-    }
-    return sortedMap;
-  }
-
-  public static void main(String[] args) throws IOException {
-
-    MudrodEngine me = new MudrodEngine();
-    Properties props = me.loadConfig();
-    ESDriver es = new ESDriver(me.getConfig());
-    HybridRecommendation test = new HybridRecommendation(props, es, null);
-
-    // String input = "NSCAT_LEVEL_1.7_V2";
-    String input = "AQUARIUS_L3_SSS_SMIA_MONTHLY-CLIMATOLOGY_V4";
-    JsonObject json = test.getRecomDataInJson(input, 10);
-
-    System.out.println(json.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/MetadataOpt.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/MetadataOpt.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/MetadataOpt.java
deleted file mode 100644
index 69dc878..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/MetadataOpt.java
+++ /dev/null
@@ -1,150 +0,0 @@
-package gov.nasa.jpl.mudrod.recommendation.structure;
-
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-import gov.nasa.jpl.mudrod.utils.LabeledRowMatrix;
-import gov.nasa.jpl.mudrod.utils.MatrixUtil;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.function.PairFunction;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import scala.Tuple2;
-
-import java.io.Serializable;
-import java.util.*;
-
-public class MetadataOpt implements Serializable {
-
-  /**
-   * 
-   */
-  private static final long serialVersionUID = 1L;
-  private String indexName;
-  private String metadataType;
-  private List<String> variables;
-
-  public static final String SPLIT_BLANK = " ";
-  public static final String SPLIT_COMMA = ",";
-
-  public MetadataOpt(Properties props) {
-    indexName = props.getProperty(MudrodConstants.ES_INDEX_NAME);
-    metadataType = props.getProperty("recom_metadataType");
-
-    variables = new ArrayList<>();
-    variables.add("DatasetParameter-Term");
-    variables.add("DatasetParameter-Variable");
-    variables.add("Dataset-Description");
-    variables.add("Dataset-LongName");
-  }
-
-  public JavaPairRDD<String, String> loadAll(ESDriver es, SparkDriver spark) 
throws Exception {
-    List<Tuple2<String, String>> datasetsTokens = this.loadMetadataFromES(es, 
variables);
-    return this.parallizeData(spark, datasetsTokens);
-  }
-
-  public JavaPairRDD<String, String> loadAll(ESDriver es, SparkDriver spark, 
List<String> variables) throws Exception {
-    List<Tuple2<String, String>> datasetsTokens = this.loadMetadataFromES(es, 
variables);
-    return this.parallizeData(spark, datasetsTokens);
-  }
-
-  private JavaPairRDD<String, String> parallizeData(SparkDriver spark, 
List<Tuple2<String, String>> datasetContent) {
-
-    JavaRDD<Tuple2<String, String>> datasetContentRDD = 
spark.sc.parallelize(datasetContent);
-
-    return datasetContentRDD.mapToPair(new PairFunction<Tuple2<String, 
String>, String, String>() {
-      /**
-       * 
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, String> call(Tuple2<String, String> term) throws 
Exception {
-        return term;
-      }
-    });
-
-  }
-
-  public JavaPairRDD<String, List<String>> tokenizeData(JavaPairRDD<String, 
String> datasetsContentRDD, String splitter) throws Exception {
-
-    return datasetsContentRDD.mapToPair(new PairFunction<Tuple2<String, 
String>, String, List<String>>() {
-      /**
-       * 
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, List<String>> call(Tuple2<String, String> arg) 
throws Exception {
-        String content = arg._2;
-        List<String> tokens = getTokens(content, splitter);
-
-        return new Tuple2<>(arg._1, tokens);
-      }
-    });
-
-  }
-
-  public List<String> getTokens(String str, String splitter) throws Exception {
-    String[] tokens = null;
-    if (splitter.equals(SPLIT_BLANK)) {
-      tokens = str.split(" ");
-    } else if (splitter.equals(SPLIT_COMMA)) {
-      tokens = str.split(",");
-    }
-    return java.util.Arrays.asList(tokens);
-  }
-
-  public List<Tuple2<String, String>> loadMetadataFromES(ESDriver es, 
List<String> variables) throws Exception {
-
-    SearchResponse scrollResp = 
es.getClient().prepareSearch(indexName).setTypes(metadataType).setQuery(QueryBuilders.matchAllQuery()).setScroll(new
 TimeValue(60000)).setSize(100).execute()
-        .actionGet();
-
-    List<Tuple2<String, String>> datasetsTokens = new ArrayList<>();
-    while (true) {
-
-      for (SearchHit hit : scrollResp.getHits().getHits()) {
-        Map<String, Object> result = hit.getSource();
-        String shortName = (String) result.get("Dataset-ShortName");
-
-        String filedStr = "";
-        int size = variables.size();
-        for (int i = 0; i < size; i++) {
-          String filed = variables.get(i);
-          Object filedValue = result.get(filed);
-
-          if (filedValue != null) {
-            filedStr = es.customAnalyzing(indexName, filedValue.toString());
-          }
-        }
-
-        datasetsTokens.add(new Tuple2<String, String>(shortName, filedStr));
-      }
-
-      scrollResp = 
es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new 
TimeValue(600000)).execute().actionGet();
-      if (scrollResp.getHits().getHits().length == 0) {
-        break;
-      }
-    }
-
-    return datasetsTokens;
-  }
-
-  public LabeledRowMatrix tFIDFTokens(JavaPairRDD<String, List<String>> 
datasetTokensRDD, SparkDriver spark) {
-
-    LabeledRowMatrix labelMatrix = 
MatrixUtil.createDocWordMatrix(datasetTokensRDD, spark.sc);
-
-    RowMatrix docwordMatrix = labelMatrix.rowMatrix;
-
-    RowMatrix docwordTFIDFMatrix = MatrixUtil.createTFIDFMatrix(docwordMatrix);
-
-    labelMatrix.rowMatrix = docwordTFIDFMatrix;
-
-    return labelMatrix;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/RecomData.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/RecomData.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/RecomData.java
deleted file mode 100644
index 9025156..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/RecomData.java
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.recommendation.structure;
-
-import com.google.gson.Gson;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodEngine;
-import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.sort.SortOrder;
-
-import java.io.IOException;
-import java.text.DecimalFormat;
-import java.util.*;
-
-/**
- * This class is used to test recommendation result similarity and 
session-level
- * similarity
- */
-public class RecomData extends DiscoveryStepAbstract {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  protected transient List<LinkedTerm> termList = new ArrayList<>();
-  DecimalFormat df = new DecimalFormat("#.00");
-  protected static final String INDEX_NAME = "indexName";
-  private static final String WEIGHT = "weight";
-
-  class LinkedTerm {
-    public String term = null;
-    public double weight = 0;
-    public String model = null;
-
-    public LinkedTerm(String str, double w, String m) {
-      term = str;
-      weight = w;
-      model = m;
-    }
-  }
-
-  public RecomData(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  @Override
-  public Object execute() {
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-  public JsonObject getRecomDataInJson(String input, int num) {
-    String type = props.getProperty("metadataTermTFIDFSimType");
-    Map<String, Double> sortedOBSimMap = getRelatedData(type, input, num + 5);
-    JsonElement linkedJson = mapToJson(sortedOBSimMap, num);
-
-    // type = props.getProperty("metadataTermTFIDFSimType");
-    type = props.getProperty("metadataCodeSimType");
-
-    Map<String, Double> sortedMBSimMap = getRelatedData(type, input, num + 5);
-    JsonElement relatedJson = mapToJson(sortedMBSimMap, num);
-
-    JsonObject json = new JsonObject();
-
-    json.add("TFIDFSim", linkedJson);
-    json.add("TopicSim", relatedJson);
-
-    return json;
-  }
-
-  protected JsonElement mapToJson(Map<String, Double> wordweights, int num) {
-    Gson gson = new Gson();
-
-    List<JsonObject> nodes = new ArrayList<>();
-    Set<String> words = wordweights.keySet();
-    int i = 0;
-    for (String wordB : words) {
-      JsonObject node = new JsonObject();
-      node.addProperty("name", wordB);
-      node.addProperty("weight", wordweights.get(wordB));
-      nodes.add(node);
-
-      i += 1;
-      if (i >= num) {
-        break;
-      }
-    }
-
-    String nodesJson = gson.toJson(nodes);
-    JsonElement nodesElement = gson.fromJson(nodesJson, JsonElement.class);
-
-    return nodesElement;
-  }
-
-  public Map<String, Double> getRelatedData(String type, String input, int 
num) {
-    termList = new ArrayList<>();
-    Map<String, Double> termsMap = new HashMap<>();
-    Map<String, Double> sortedMap = new HashMap<>();
-    try {
-      List<LinkedTerm> links = getRelatedDataFromES(type, input, num);
-      int size = links.size();
-      for (int i = 0; i < size; i++) {
-        termsMap.put(links.get(i).term, links.get(i).weight);
-      }
-
-      sortedMap = sortMapByValue(termsMap); // terms_map will be empty
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-
-    return sortedMap;
-  }
-
-  public List<LinkedTerm> getRelatedDataFromES(String type, String input, int 
num) {
-    SearchRequestBuilder builder = 
es.getClient().prepareSearch(props.getProperty(INDEX_NAME)).setTypes(type).setQuery(QueryBuilders.termQuery("concept_A",
 input)).addSort(WEIGHT, SortOrder.DESC)
-        .setSize(num);
-
-    SearchResponse usrhis = builder.execute().actionGet();
-
-    for (SearchHit hit : usrhis.getHits().getHits()) {
-      Map<String, Object> result = hit.getSource();
-      String conceptB = (String) result.get("concept_B");
-
-      if (!conceptB.equals(input)) {
-        LinkedTerm lTerm = new LinkedTerm(conceptB, (double) 
result.get(WEIGHT), type);
-        termList.add(lTerm);
-      }
-    }
-
-    return termList;
-  }
-
-  public Map<String, Double> sortMapByValue(Map<String, Double> passedMap) {
-    List<String> mapKeys = new ArrayList<>(passedMap.keySet());
-    List<Double> mapValues = new ArrayList<>(passedMap.values());
-    Collections.sort(mapValues, Collections.reverseOrder());
-    Collections.sort(mapKeys, Collections.reverseOrder());
-
-    LinkedHashMap<String, Double> sortedMap = new LinkedHashMap<>();
-
-    Iterator<Double> valueIt = mapValues.iterator();
-    while (valueIt.hasNext()) {
-      Object val = valueIt.next();
-      Iterator<String> keyIt = mapKeys.iterator();
-
-      while (keyIt.hasNext()) {
-        Object key = keyIt.next();
-        String comp1 = passedMap.get(key).toString();
-        String comp2 = val.toString();
-
-        if (comp1.equals(comp2)) {
-          passedMap.remove(key);
-          mapKeys.remove(key);
-          sortedMap.put((String) key, (Double) val);
-          break;
-        }
-      }
-    }
-    return sortedMap;
-  }
-
-  public static void main(String[] args) throws IOException {
-
-    MudrodEngine me = new MudrodEngine();
-    Properties props = me.loadConfig();
-    ESDriver es = new ESDriver(me.getConfig());
-    RecomData test = new RecomData(props, es, null);
-
-    String input = "AQUARIUS_L3_SSS_SMIA_MONTHLY-CLIMATOLOGY_V4";
-    JsonObject json = test.getRecomDataInJson(input, 10);
-
-    System.out.println(json.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/package-info.java
 
b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/package-info.java
deleted file mode 100644
index 99199ca..0000000
--- 
a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/structure/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes the data structure required by recommendation module.
- */
-package gov.nasa.jpl.mudrod.recommendation.structure;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SVDAnalyzer.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SVDAnalyzer.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SVDAnalyzer.java
deleted file mode 100644
index 3e63b04..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SVDAnalyzer.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.semantics;
-
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.utils.MatrixUtil;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.distributed.RowMatrix;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-/**
- * ClassName: SVDAnalyzer Function: Analyze semantic relationship through SVD
- * method
- */
-public class SVDAnalyzer extends SemanticAnalyzer {
-
-  /**
-   * 
-   */
-  private static final long serialVersionUID = 1L;
-
-  /**
-   * Creates a new instance of SVDAnalyzer.
-   *
-   * @param props the Mudrod configuration
-   * @param es    the Elasticsearch drive
-   * @param spark the spark drive
-   */
-  public SVDAnalyzer(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  /**
-   * GetSVDMatrix: Create SVD matrix csv file from original csv file.
-   *
-   * @param csvFileName       each row is a term, and each column is a 
document.
-   * @param svdDimention      Dimension of SVD matrix
-   * @param svdMatrixFileName CSV file name of SVD matrix
-   */
-  public void getSVDMatrix(String csvFileName, int svdDimention, String 
svdMatrixFileName) {
-
-    JavaPairRDD<String, Vector> importRDD = 
MatrixUtil.loadVectorFromCSV(spark, csvFileName, 1);
-    JavaRDD<Vector> vectorRDD = importRDD.values();
-    RowMatrix wordDocMatrix = new RowMatrix(vectorRDD.rdd());
-    RowMatrix tfidfMatrix = MatrixUtil.createTFIDFMatrix(wordDocMatrix);
-    RowMatrix svdMatrix = MatrixUtil.buildSVDMatrix(tfidfMatrix, svdDimention);
-
-    List<String> rowKeys = importRDD.keys().collect();
-    List<String> colKeys = new ArrayList<>();
-    for (int i = 0; i < svdDimention; i++) {
-      colKeys.add("dimension" + i);
-    }
-    MatrixUtil.exportToCSV(svdMatrix, rowKeys, colKeys, svdMatrixFileName);
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SemanticAnalyzer.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SemanticAnalyzer.java
deleted file mode 100644
index be8b2b3..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/semantics/SemanticAnalyzer.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.semantics;
-
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.utils.LinkageTriple;
-import gov.nasa.jpl.mudrod.utils.MatrixUtil;
-import gov.nasa.jpl.mudrod.utils.SimilarityUtil;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vector;
-import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.Properties;
-
-/**
- * ClassName: SemanticAnalyzer Function: Semantic analyzer
- */
-public class SemanticAnalyzer extends MudrodAbstract {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-
-  /**
-   * Creates a new instance of SemanticAnalyzer.
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch drive
-   * @param spark
-   *          the spark drive
-   */
-  public SemanticAnalyzer(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  /**
-   * Calculate term similarity from CSV matrix.
-   *
-   * @param csvFileName
-   *          csv file of matrix, each row is a term, and each column is a
-   *          dimension in feature space
-   * @return Linkage triple list
-   */
-  public List<LinkageTriple> calTermSimfromMatrix(String csvFileName) {
-    File f = new File(csvFileName);
-    if (!f.exists()) {
-      return null;
-    }
-    return this.calTermSimfromMatrix(csvFileName, 1);
-  }
-
-  /**
-   * Calculate term similarity from CSV matrix.
-   *
-   * @param csvFileName csv file of matrix, each row is a term, and each 
column is a
-   *                    dimension in feature space
-   * @param skipRow number of rows to skip in input CSV file e.g. header
-   * @return Linkage triple list
-   */
-  public List<LinkageTriple> calTermSimfromMatrix(String csvFileName, int 
skipRow) {
-
-    JavaPairRDD<String, Vector> importRDD = 
MatrixUtil.loadVectorFromCSV(spark, csvFileName, skipRow);
-    if (importRDD == null || importRDD.values().first().size() == 0) {
-      return null;
-    }
-
-    CoordinateMatrix simMatrix = 
SimilarityUtil.calculateSimilarityFromVector(importRDD.values());
-    JavaRDD<String> rowKeyRDD = importRDD.keys();
-    return SimilarityUtil.matrixToTriples(rowKeyRDD, simMatrix);
-  }
-
-  /**
-   * Calculate term similarity from CSV matrix.
-   *
-   * @param csvFileName csv file of matrix, each row is a term, and each 
column is a
-   *                    dimension in feature space
-   * @param simType the type of similary calculation to execute e.g.
-   * <ul>
-   * <li>{@link gov.nasa.jpl.mudrod.utils.SimilarityUtil#SIM_COSINE} - 3,</li>
-   * <li>{@link gov.nasa.jpl.mudrod.utils.SimilarityUtil#SIM_HELLINGER} - 
2,</li>
-   * <li>{@link gov.nasa.jpl.mudrod.utils.SimilarityUtil#SIM_PEARSON} - 1</li>
-   * </ul>
-   * @param skipRow number of rows to skip in input CSV file e.g. header
-   * @return Linkage triple list
-   */
-  public List<LinkageTriple> calTermSimfromMatrix(String csvFileName, int 
simType, int skipRow) {
-
-    JavaPairRDD<String, Vector> importRDD = 
MatrixUtil.loadVectorFromCSV(spark, csvFileName, skipRow);
-    if (importRDD.values().first().size() == 0) {
-      return null;
-    }
-
-    JavaRDD<LinkageTriple> triples = 
SimilarityUtil.calculateSimilarityFromVector(importRDD, simType);
-
-    return triples.collect();
-  }
-
-  public void saveToES(List<LinkageTriple> tripleList, String index, String 
type) {
-    try {
-      LinkageTriple.insertTriples(es, tripleList, index, type);
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Method of saving linkage triples to Elasticsearch.
-   *
-   * @param tripleList
-   *          linkage triple list
-   * @param index
-   *          index name
-   * @param type
-   *          type name
-   * @param bTriple
-   *          bTriple
-   * @param bSymmetry
-   *          bSymmetry
-   */
-  public void saveToES(List<LinkageTriple> tripleList, String index, String 
type, boolean bTriple, boolean bSymmetry) {
-    try {
-      LinkageTriple.insertTriples(es, tripleList, index, type, bTriple, 
bSymmetry);
-    } catch (IOException e) {
-      e.printStackTrace();
-
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/semantics/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/semantics/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/semantics/package-info.java
deleted file mode 100644
index 9c2e8ac..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/semantics/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes SVD transformation function, methods of calculating
- * similarity from CSV, and saving triples into Elasticsearch
- */
-package gov.nasa.jpl.mudrod.semantics;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ClickstreamImporter.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ClickstreamImporter.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ClickstreamImporter.java
deleted file mode 100644
index 5cb130c..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/ClickstreamImporter.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch;
-
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.Properties;
-
-import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
-
-/**
- * Supports ability to import click stream data into Elasticsearch
- * through .csv file
- */
-public class ClickstreamImporter extends MudrodAbstract {
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-
-  public ClickstreamImporter(Properties props, ESDriver es, SparkDriver spark) 
{
-    super(props, es, spark);
-    addClickStreamMapping();
-  }
-
-  /**
-   * Method to add Elasticsearch mapping for click stream data
-   */
-  public void addClickStreamMapping() {
-    XContentBuilder Mapping;
-    try {
-      Mapping = jsonBuilder().startObject().startObject(
-              
props.getProperty(MudrodConstants.CLICK_STREAM_MATRIX_TYPE)).startObject(
-                      "properties").startObject("query").field("type", 
"string").field(
-                              "index", 
"not_analyzed").endObject().startObject("dataID").field(
-                                      "type", "string").field("index", 
"not_analyzed").endObject()
-
-          .endObject().endObject().endObject();
-
-      es.getClient().admin().indices().preparePutMapping(
-              props.getProperty(MudrodConstants.ES_INDEX_NAME)).setType(
-                      
props.getProperty(MudrodConstants.CLICK_STREAM_MATRIX_TYPE)).setSource(
-                              Mapping).execute().actionGet();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Method to import click stream CSV into Elasticsearch
-   */
-  public void importfromCSVtoES() {
-    es.deleteType(props.getProperty(MudrodConstants.ES_INDEX_NAME), 
-            props.getProperty(MudrodConstants.CLICK_STREAM_MATRIX_TYPE));
-    es.createBulkProcessor();
-
-    BufferedReader br = null;
-    String cvsSplitBy = ",";
-
-    try {
-      br = new BufferedReader(new 
FileReader(props.getProperty("clickstreamMatrix")));
-      String line = br.readLine();
-      // first item needs to be skipped
-      String[] dataList = line.split(cvsSplitBy);
-      while ((line = br.readLine()) != null) {
-        String[] clicks = line.split(cvsSplitBy);
-        for (int i = 1; i < clicks.length; i++) {
-          if (!"0.0".equals(clicks[i])) {
-            IndexRequest ir = new 
IndexRequest(props.getProperty(MudrodConstants.ES_INDEX_NAME), 
-                    
props.getProperty(MudrodConstants.CLICK_STREAM_MATRIX_TYPE))
-                .source(jsonBuilder().startObject().field("query", 
clicks[0]).field(
-                        "dataID", dataList[i]).field("clicks", 
clicks[i]).endObject());
-            es.getBulkProcessor().add(ir);
-          }
-        }
-      }
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    } finally {
-      if (br != null) {
-        try {
-          br.close();
-          es.destroyBulkProcessor();
-        } catch (IOException e) {
-          e.printStackTrace();
-        }
-      }
-    }
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Dispatcher.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Dispatcher.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Dispatcher.java
deleted file mode 100644
index a0f3a2c..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Dispatcher.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch;
-
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.integration.LinkageIntegration;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.MatchQueryBuilder;
-import org.elasticsearch.index.query.MultiMatchQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Properties;
-
-/**
- * Supports ability to transform regular user query into a semantic query
- */
-public class Dispatcher extends MudrodAbstract {
-  private static final Logger LOG = LoggerFactory.getLogger(Dispatcher.class);
-
-  public Dispatcher(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  /**
-   * Method of getting semantically most related terms by number
-   *
-   * @param input regular input query
-   * @param num   the number of most related terms
-   * @return a map from term to similarity
-   */
-  public Map<String, Double> getRelatedTerms(String input, int num) {
-    LinkageIntegration li = new LinkageIntegration(props, this.es, null);
-    Map<String, Double> sortedMap = li.appyMajorRule(input);
-    Map<String, Double> selected_Map = new HashMap<>();
-    int count = 0;
-    for (Entry<String, Double> entry : sortedMap.entrySet()) {
-      if (count < num) {
-        selected_Map.put(entry.getKey(), entry.getValue());
-      }
-      count++;
-    }
-    return selected_Map;
-  }
-
-  /**
-   * Method of getting semantically most related terms by similarity threshold
-   *
-   * @param input regular input query
-   * @param T     value of threshold, raning from 0 to 1
-   * @return a map from term to similarity
-   */
-  public Map<String, Double> getRelatedTermsByT(String input, double T) {
-    LinkageIntegration li = new LinkageIntegration(this.props, this.es, null);
-    Map<String, Double> sortedMap = li.appyMajorRule(input);
-    Map<String, Double> selected_Map = new HashMap<>();
-
-    for (Entry<String, Double> entry : sortedMap.entrySet()) {
-      if (entry.getValue() >= T) {
-        selected_Map.put(entry.getKey(), entry.getValue());
-      }
-    }
-    return selected_Map;
-  }
-
-  /**
-   * Method of creating semantic query based on Threshold
-   *
-   * @param input          regular query
-   * @param T              threshold raning from 0 to 1
-   * @param query_operator query mode
-   * @return a multiMatch query builder
-   */
-  public BoolQueryBuilder createSemQuery(String input, double T, String 
query_operator) {
-    Map<String, Double> selected_Map = getRelatedTermsByT(input, T);
-    selected_Map.put(input, (double) 1);
-
-    String fieldsList[] = { "Dataset-Metadata", "Dataset-ShortName", 
"Dataset-LongName", 
-        "DatasetParameter-Topic", "DatasetParameter-VariableDetail", 
"DatasetParameter-Category",
-        "DatasetParameter-Variable", "DatasetParameter-Term",       
-        "DatasetSource-Source-LongName", "DatasetSource-Source-LongName-Full",
-        "DatasetSource-Source-ShortName", 
"DatasetSource-Source-ShortName-Full", 
-        "DatasetSource-Sensor-LongName", "DatasetSource-Sensor-LongName-Full", 
"DatasetSource-Sensor-ShortName",
-        "DatasetSource-Sensor-ShortName-Full" };
-    BoolQueryBuilder qb = new BoolQueryBuilder();
-    for (Entry<String, Double> entry : selected_Map.entrySet()) {
-      if (query_operator.toLowerCase().trim().equals("phrase")) {
-        qb.should(QueryBuilders.multiMatchQuery(entry.getKey(), 
fieldsList).boost(entry.getValue().floatValue()).type(MultiMatchQueryBuilder.Type.PHRASE).tieBreaker((float)
 0.5)); // when
-        // set
-        // to
-        // 1.0,
-        // it
-        // would
-        // be
-        // equal
-        // to
-        // "most
-        // fields"
-        // query
-      } else if (query_operator.toLowerCase().trim().equals("and")) {
-        qb.should(QueryBuilders.multiMatchQuery(entry.getKey(), 
fieldsList).boost(entry.getValue().floatValue()).operator(MatchQueryBuilder.DEFAULT_OPERATOR.AND).tieBreaker((float)
 0.5));
-      } else {
-        qb.should(QueryBuilders.multiMatchQuery(entry.getKey(), 
fieldsList).boost(entry.getValue().floatValue()).operator(MatchQueryBuilder.DEFAULT_OPERATOR.OR).tieBreaker((float)
 0.5));
-      }
-    }
-
-    // LOG.info(qb.toString());
-    return qb;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Ranker.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Ranker.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Ranker.java
deleted file mode 100644
index 32830d5..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Ranker.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch;
-
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-import gov.nasa.jpl.mudrod.ssearch.ranking.Learner;
-import gov.nasa.jpl.mudrod.ssearch.structure.SResult;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-
-import java.io.Serializable;
-import java.text.DecimalFormat;
-import java.util.*;
-
-/**
- * Supports the ability to calculating ranking score
- */
-public class Ranker extends MudrodAbstract implements Serializable {
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  transient List<SResult> resultList = new ArrayList<>();
-
-  String learnerType = null;
-  Learner le = null;
-
-  public Ranker(Properties props, ESDriver es, SparkDriver spark, String 
learnerType) {
-    super(props, es, spark);
-    this.learnerType = learnerType;
-    le = new Learner(learnerType, spark, 
props.getProperty(MudrodConstants.SVM_SGD_MODEL));
-  }
-
-  /**
-   * Method of comparing results based on final score
-   */
-  public class ResultComparator implements Comparator<SResult> {
-    @Override
-    public int compare(SResult o1, SResult o2) {
-      return o2.below.compareTo(o1.below);
-    }
-  }
-
-  /**
-   * Method of calculating mean value
-   *
-   * @param attribute  the attribute name that need to be calculated on
-   * @param resultList an array list of result
-   * @return mean value
-   */
-  private double getMean(String attribute, List<SResult> resultList) {
-    double sum = 0.0;
-    for (SResult a : resultList) {
-      sum += (double) SResult.get(a, attribute);
-    }
-    return getNDForm(sum / resultList.size());
-  }
-
-  /**
-   * Method of calculating variance value
-   *
-   * @param attribute  the attribute name that need to be calculated on
-   * @param resultList an array list of result
-   * @return variance value
-   */
-  private double getVariance(String attribute, List<SResult> resultList) {
-    double mean = getMean(attribute, resultList);
-    double temp = 0.0;
-    double val;
-    for (SResult a : resultList) {
-      val = (Double) SResult.get(a, attribute);
-      temp += (mean - val) * (mean - val);
-    }
-
-    return getNDForm(temp / resultList.size());
-  }
-
-  /**
-   * Method of calculating standard variance
-   *
-   * @param attribute  the attribute name that need to be calculated on
-   * @param resultList an array list of result
-   * @return standard variance
-   */
-  private double getStdDev(String attribute, List<SResult> resultList) {
-    return getNDForm(Math.sqrt(getVariance(attribute, resultList)));
-  }
-
-  /**
-   * Method of calculating Z score
-   *
-   * @param val  the value of an attribute
-   * @param mean the mean value of an attribute
-   * @param std  the standard deviation of an attribute
-   * @return Z score
-   */
-  private double getZscore(double val, double mean, double std) {
-    if (!equalComp(std, 0)) {
-      return getNDForm((val - mean) / std);
-    } else {
-      return 0;
-    }
-  }
-
-  private boolean equalComp(double a, double b) {
-    return Math.abs(a - b) < 0.0001;
-  }
-
-  /**
-   * Get the first N decimals of a double value
-   *
-   * @param d double value that needs to be processed
-   * @return processed double value
-   */
-  private double getNDForm(double d) {
-    DecimalFormat ndForm = new DecimalFormat("#.###");
-    return Double.valueOf(ndForm.format(d));
-  }
-
-  /**
-   * Method of ranking a list of result
-   *
-   * @param resultList result list
-   * @return ranked result list
-   */
-  public List<SResult> rank(List<SResult> resultList) {
-    for (int i = 0; i < resultList.size(); i++) {
-      for (int m = 0; m < SResult.rlist.length; m++) {
-        String att = SResult.rlist[m].split("_")[0];
-        double val = SResult.get(resultList.get(i), att);
-        double mean = getMean(att, resultList);
-        double std = getStdDev(att, resultList);
-        double score = getZscore(val, mean, std);
-        String scoreId = SResult.rlist[m];
-        SResult.set(resultList.get(i), scoreId, score);
-      }
-    }
-
-    // using collection.sort directly would cause an "not transitive" error
-    // this is because the training model is not a overfitting model
-    for (int j = 0; j < resultList.size(); j++) {
-      for (int k = 0; k < resultList.size(); k++) {
-        if (k != j) {
-          resultList.get(j).below += comp(resultList.get(j), 
resultList.get(k));
-        }
-      }
-    }
-
-    Collections.sort(resultList, new ResultComparator());
-    return resultList;
-  }
-
-  /**
-   * Method of compare two search resutls
-   *
-   * @param o1 search result 1
-   * @param o2 search result 2
-   * @return 1 if o1 is greater than o2, 0 otherwise
-   */
-  public int comp(SResult o1, SResult o2) {
-    List<Double> instList = new ArrayList<>();
-    for (int i = 0; i < SResult.rlist.length; i++) {
-      double o2Score = SResult.get(o2, SResult.rlist[i]);
-      double o1Score = SResult.get(o1, SResult.rlist[i]);
-      instList.add(o2Score - o1Score);
-    }
-
-    double[] ins = instList.stream().mapToDouble(i -> i).toArray();
-    LabeledPoint insPoint = new LabeledPoint(99.0, Vectors.dense(ins));
-    double prediction = le.classify(insPoint);
-    if (equalComp(prediction, 1)) { //different from weka where the return 
value is 1 or 2
-      return 0;
-    } else {
-      return 1;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Searcher.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Searcher.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Searcher.java
deleted file mode 100644
index f407f92..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/Searcher.java
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ssearch;
-
-import com.google.gson.Gson;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.ssearch.structure.SResult;
-
-import org.elasticsearch.action.search.SearchRequestBuilder;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.index.query.BoolQueryBuilder;
-import org.elasticsearch.index.query.QueryBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.sort.SortBuilder;
-import org.elasticsearch.search.sort.SortOrder;
-
-import java.io.Serializable;
-import java.text.DecimalFormat;
-import java.text.SimpleDateFormat;
-import java.util.*;
-import java.util.regex.Pattern;
-
-/**
- * Supports ability to performance semantic search with a given query
- */
-public class Searcher extends MudrodAbstract implements Serializable {
-  /**
-   * 
-   */
-  private static final long serialVersionUID = 1L;
-  DecimalFormat NDForm = new DecimalFormat("#.##");
-  final Integer MAX_CHAR = 700;
-
-  public Searcher(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  /**
-   * Method of converting processing level string into a number
-   *
-   * @param pro processing level string
-   * @return processing level number
-   */
-  public Double getProLevelNum(String pro) {
-    if (pro == null) {
-      return 1.0;
-    }
-    Double proNum;
-    Pattern p = Pattern.compile(".*[a-zA-Z].*");
-    if (pro.matches("[0-9]{1}[a-zA-Z]{1}")) {
-      proNum = Double.parseDouble(pro.substring(0, 1));
-    } else if (p.matcher(pro).find()) {
-      proNum = 1.0;
-    } else {
-      proNum = Double.parseDouble(pro);
-    }
-
-    return proNum;
-  }
-
-  public Double getPop(Double pop) {
-    if (pop > 1000) {
-      pop = 1000.0;
-    }
-    return pop;
-  }
-
-  /**
-   * Method of checking if query exists in a certain attribute
-   *
-   * @param strList attribute value in the form of ArrayList
-   * @param query   query string
-   * @return 1 means query exists, 0 otherwise
-   */
-  public Double exists(ArrayList<String> strList, String query) {
-    Double val = 0.0;
-    if (strList != null) {
-      String str = String.join(", ", strList);
-      if (str != null && str.length() != 0 && 
str.toLowerCase().trim().contains(query)) {
-        val = 1.0;
-      }
-    }
-    return val;
-  }
-
-  /**
-   * Main method of semantic search
-   *
-   * @param index          index name in Elasticsearch
-   * @param type           type name in Elasticsearch
-   * @param query          regular query string
-   * @param queryOperator query mode- query, or, and
-   * @return a list of search result
-   */
-  @SuppressWarnings("unchecked")
-  public List<SResult> searchByQuery(String index, String type, String query, 
String queryOperator, String rankOption) {
-    boolean exists = 
es.getClient().admin().indices().prepareExists(index).execute().actionGet().isExists();
-    if (!exists) {
-      return new ArrayList<>();
-    }
-
-    SortOrder order = null;
-    String sortFiled = "";
-    switch (rankOption) {
-    case "Rank-AllTimePopularity":
-      sortFiled = "Dataset-AllTimePopularity";
-      order = SortOrder.DESC;
-      break;
-    case "Rank-MonthlyPopularity":
-      sortFiled = "Dataset-MonthlyPopularity";
-      order = SortOrder.DESC;
-      break;
-    case "Rank-UserPopularity":
-      sortFiled = "Dataset-UserPopularity";
-      order = SortOrder.DESC;
-      break;
-    case "Rank-LongName-Full":
-      sortFiled = "Dataset-LongName.raw";
-      order = SortOrder.ASC;
-      break;
-    case "Rank-ShortName-Full":
-      sortFiled = "Dataset-ShortName.raw";
-      order = SortOrder.ASC;
-      break;
-    case "Rank-GridSpatialResolution":
-      sortFiled = "Dataset-GridSpatialResolution";
-      order = SortOrder.DESC;
-      break;
-    case "Rank-SatelliteSpatialResolution":
-      sortFiled = "Dataset-SatelliteSpatialResolution";
-      order = SortOrder.DESC;
-      break;
-    case "Rank-StartTimeLong-Long":
-      sortFiled = "DatasetCoverage-StartTimeLong-Long";
-      order = SortOrder.ASC;
-      break;
-    case "Rank-StopTimeLong-Long":
-      sortFiled = "DatasetCoverage-StopTimeLong-Long";
-      order = SortOrder.DESC;
-      break;
-    default:
-      sortFiled = "Dataset-ShortName.raw";
-      order = SortOrder.ASC;
-      break;
-    }
-
-    Dispatcher dp = new Dispatcher(this.getConfig(), this.getES(), null);
-    BoolQueryBuilder qb = dp.createSemQuery(query, 1.0, queryOperator);
-    List<SResult> resultList = new ArrayList<>();
-
-    SearchRequestBuilder builder = 
es.getClient().prepareSearch(index).setTypes(type).setQuery(qb).addSort(sortFiled,
 order).setSize(500).setTrackScores(true);
-    SearchResponse response = builder.execute().actionGet();
-
-    for (SearchHit hit : response.getHits().getHits()) {
-      Map<String, Object> result = hit.getSource();
-      Double relevance = Double.valueOf(NDForm.format(hit.getScore()));
-      String shortName = (String) result.get("Dataset-ShortName");
-      String longName = (String) result.get("Dataset-LongName");
-
-      ArrayList<String> topicList = (ArrayList<String>) 
result.get("DatasetParameter-Variable");
-      String topic = "";
-      if (null != topicList) {
-        topic = String.join(", ", topicList);
-      }
-      String content = (String) result.get("Dataset-Description");
-
-      if (!"".equals(content)) {
-        int maxLength = (content.length() < MAX_CHAR) ? content.length() : 
MAX_CHAR;
-        content = content.trim().substring(0, maxLength - 1) + "...";
-      }
-
-      ArrayList<String> longdate = (ArrayList<String>) 
result.get("DatasetCitation-ReleaseDateLong");
-      Date date = new Date(Long.valueOf(longdate.get(0)).longValue());
-      SimpleDateFormat df2 = new SimpleDateFormat("MM/dd/yyyy");
-      String dateText = df2.format(date);
-
-      // start date
-      Long start = (Long) result.get("DatasetCoverage-StartTimeLong-Long");
-      Date startDate = new Date(start);
-      String startDateTxt = df2.format(startDate);
-
-      // end date
-      String end = (String) result.get("Dataset-DatasetCoverage-StopTimeLong");
-      String endDateTxt = "";
-      if ("".equals(end)) {
-        endDateTxt = "Present";
-      } else {
-        Date endDate = new Date(Long.valueOf(end));
-        endDateTxt = df2.format(endDate);
-      }
-
-      String processingLevel = (String) result.get("Dataset-ProcessingLevel");
-      Double proNum = getProLevelNum(processingLevel);
-
-      Double userPop = getPop(((Integer) 
result.get("Dataset-UserPopularity")).doubleValue());
-      Double allPop = getPop(((Integer) 
result.get("Dataset-AllTimePopularity")).doubleValue());
-      Double monthPop = getPop(((Integer) 
result.get("Dataset-MonthlyPopularity")).doubleValue());
-
-      List<String> sensors = (List<String>) 
result.get("DatasetSource-Sensor-ShortName");
-
-      SResult re = new SResult(shortName, longName, topic, content, dateText);
-
-      SResult.set(re, "term", relevance);
-      SResult.set(re, "releaseDate", 
Long.valueOf(longdate.get(0)).doubleValue());
-      SResult.set(re, "processingLevel", processingLevel);
-      SResult.set(re, "processingL", proNum);
-      SResult.set(re, "userPop", userPop);
-      SResult.set(re, "allPop", allPop);
-      SResult.set(re, "monthPop", monthPop);
-      SResult.set(re, "startDate", startDateTxt);
-      SResult.set(re, "endDate", endDateTxt);
-      SResult.set(re, "sensors", String.join(", ", sensors));
-
-      QueryBuilder queryLabelSearch = 
QueryBuilders.boolQuery().must(QueryBuilders.termQuery("query", 
query)).must(QueryBuilders.termQuery("dataID", shortName));
-      SearchResponse labelRes = 
es.getClient().prepareSearch(index).setTypes("trainingranking").setQuery(queryLabelSearch).setSize(5).execute().actionGet();
-      String labelString = null;
-      for (SearchHit label : labelRes.getHits().getHits()) {
-        Map<String, Object> labelItem = label.getSource();
-        labelString = (String) labelItem.get("label");
-      }
-      SResult.set(re, "label", labelString);
-      resultList.add(re);
-    }
-
-    return resultList;
-  }
-
-  /**
-   * Method of semantic search to generate JSON string
-   *
-   * @param index          index name in Elasticsearch
-   * @param type           type name in Elasticsearch
-   * @param query          regular query string
-   * @param queryOperator query mode- query, or, and
-   * @param rr             selected ranking method
-   * @return search results
-   */
-  public String ssearch(String index, String type, String query, String 
queryOperator, String rankOption, Ranker rr) {
-    List<SResult> li = searchByQuery(index, type, query, queryOperator, 
rankOption);
-    if ("Rank-SVM".equals(rankOption)) {
-      li = rr.rank(li);
-    }
-    Gson gson = new Gson();
-    List<JsonObject> fileList = new ArrayList<>();
-
-    for (int i = 0; i < li.size(); i++) {
-      JsonObject file = new JsonObject();
-      file.addProperty("Short Name", (String) SResult.get(li.get(i), 
"shortName"));
-      file.addProperty("Long Name", (String) SResult.get(li.get(i), 
"longName"));
-      file.addProperty("Topic", (String) SResult.get(li.get(i), "topic"));
-      file.addProperty("Description", (String) SResult.get(li.get(i), 
"description"));
-      file.addProperty("Release Date", (String) SResult.get(li.get(i), 
"relase_date"));
-      fileList.add(file);
-
-      file.addProperty("Start/End Date", (String) SResult.get(li.get(i), 
"startDate") + " - " + (String) SResult.get(li.get(i), "endDate"));
-      file.addProperty("Processing Level", (String) SResult.get(li.get(i), 
"processingLevel"));
-
-      file.addProperty("Sensor", (String) SResult.get(li.get(i), "sensors"));
-    }
-    JsonElement fileListElement = gson.toJsonTree(fileList);
-
-    JsonObject pDResults = new JsonObject();
-    pDResults.add("PDResults", fileListElement);
-    return pDResults.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/package-info.java
deleted file mode 100644
index da6bea3..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ssearch/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes classes for semantic search, such as click stream 
importer,
- * query dispatcher, semantic searcher, and ranker (ranksvm, ordinal/linear 
regression)
- */
-package gov.nasa.jpl.mudrod.ssearch;
\ No newline at end of file

[14/17] incubator-sdap-mudrod git commit: SDAP-7 Change all package namespaces to org.apache.sdap

Reply via email to