[14/21] incubator-sdap-mudrod git commit: SDAP-1 Import all code under the SDAP SGA

lewismc Fri, 27 Oct 2017 15:35:01 -0700

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java
new file mode 100644
index 0000000..bbfb79c
--- /dev/null
+++ b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package gov.nasa.jpl.mudrod.weblog.structure;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * ClassName: RequestUrl Function: request url relate operations
+ */
+public class RequestUrl {
+
+  private static final Logger LOG = LoggerFactory.getLogger(RequestUrl.class);
+
+  /**
+   * Default Constructor
+   */
+  public RequestUrl() {
+    /* Default Constructor */
+  }
+
+  /**
+   * UrlPage: Get url page from url link
+   *
+   * @param strURL request url
+   * @return page name
+   */
+  public static String urlPage(String strURL) {
+    String strPage = null;
+    String[] arrSplit = null;
+
+    String newURL = strURL.trim().toLowerCase();
+
+    arrSplit = newURL.split("[?]");
+    if (newURL.length() > 0 && arrSplit.length > 1 && arrSplit[0] != null) {
+      strPage = arrSplit[0];
+    }
+
+    return strPage;
+  }
+
+  /**
+   * TruncateUrlPage: Get url params from url link
+   *
+   * @param strURL
+   * @return url params
+   */
+  private static String truncateUrlPage(String strURL) {
+    String strAllParam = null;
+    String[] arrSplit = null;
+
+    strURL = strURL.trim().toLowerCase(); // keep this in mind
+
+    arrSplit = strURL.split("[?]");
+    if (strURL.length() > 1) {
+      if (arrSplit.length > 1) {
+        if (arrSplit[1] != null) {
+          strAllParam = arrSplit[1];
+        }
+      }
+    }
+
+    return strAllParam;
+  }
+
+  /**
+   * URLRequest: Get url params from url link in a map format
+   *
+   * @param URL request url
+   * @return url params key value map
+   */
+  public static Map<String, String> uRLRequest(String URL) {
+    Map<String, String> mapRequest = new HashMap<String, String>();
+
+    String[] arrSplit = null;
+
+    String strUrlParam = truncateUrlPage(URL);
+    if (strUrlParam == null) {
+      return mapRequest;
+    }
+
+    arrSplit = strUrlParam.split("[&]");
+    for (String strSplit : arrSplit) {
+      String[] arrSplitEqual = null;
+      arrSplitEqual = strSplit.split("[=]");
+
+      if (arrSplitEqual.length > 1) {
+
+        mapRequest.put(arrSplitEqual[0], arrSplitEqual[1]);
+
+      } else {
+        if (arrSplitEqual[0] != "") {
+
+          mapRequest.put(arrSplitEqual[0], "");
+        }
+      }
+    }
+    return mapRequest;
+  }
+
+  /**
+   * GetSearchInfo: Get search information from url link
+   *
+   * @param URL request url
+   * @return search params
+   * @throws UnsupportedEncodingException UnsupportedEncodingException
+   */
+  public String getSearchInfo(String URL) throws UnsupportedEncodingException {
+    List<String> info = new ArrayList<String>();
+    String keyword = "";
+    Map<String, String> mapRequest = RequestUrl.uRLRequest(URL);
+    if (mapRequest.get("search") != null) {
+      try {
+        keyword = mapRequest.get("search");
+
+        keyword = URLDecoder.decode(keyword.replaceAll("%(?![0-9a-fA-F]{2})", 
"%25"), "UTF-8");
+        if (keyword.contains("%2b") || keyword.contains("%20") || 
keyword.contains("%25")) {
+          keyword = keyword.replace("%2b", " ");
+          keyword = keyword.replace("%20", " ");
+          keyword = keyword.replace("%25", " ");
+        }
+
+        keyword = keyword.replaceAll("[-+^:,*_\"]", " ").replace("\\", " 
").replaceAll("\\s+", " ").trim();
+
+      } catch (UnsupportedEncodingException e) {
+        LOG.error(mapRequest.get("search"));
+        e.printStackTrace();
+      }
+      if (!"".equals(keyword)) {
+        info.add(keyword);
+      }
+
+    }
+
+    if (mapRequest.get("ids") != null && mapRequest.get("values") != null) {
+      String id_raw = URLDecoder.decode(mapRequest.get("ids"), "UTF-8");
+      String value_raw = URLDecoder.decode(mapRequest.get("values"), "UTF-8");
+      String[] ids = id_raw.split(":");
+      String[] values = value_raw.split(":");
+
+      int a = ids.length;
+      int b = values.length;
+      int l = a < b ? a : b;
+
+      for (int i = 0; i < l; i++) {
+        if (ids[i].equals("collections") || ids[i].equals("measurement") || 
ids[i].equals("sensor") || ids[i].equals("platform") || 
ids[i].equals("variable") || ids[i].equals("spatialcoverage")) {
+          try {
+            values[i] = values[i].replaceAll("%(?![0-9a-fA-F]{2})", "%25");
+            if (!URLDecoder.decode(values[i], "UTF-8").equals(keyword) && 
!URLDecoder.decode(values[i], "UTF-8").equals("")) {
+              String item = URLDecoder.decode(values[i], "UTF-8").trim();
+              if (item.contains("%2b") || item.contains("%20") || 
item.contains("%25")) {
+                item = item.replace("%2b", " ");
+                item = item.replace("%20", " ");
+                item = item.replace("%25", " ");
+              }
+              item = item.replaceAll("[-+^:,*_\"]", " ").replace("\\", " 
").replaceAll("\\s+", " ").trim();
+              info.add(item);
+            }
+          } catch (Exception e) {
+            LOG.error(values[i]);
+            e.printStackTrace();
+          }
+        }
+
+      }
+    }
+
+    return String.join(",", info);
+  }
+
+  /**
+   * GetSearchWord: Get search words from url link
+   *
+   * @param url request url
+   * @return query
+   */
+  public static String getSearchWord(String url) {
+    String keyword = "";
+
+    Map<String, String> mapRequest = RequestUrl.uRLRequest(url);
+    if (mapRequest.get("search") != null) {
+      try {
+        keyword = mapRequest.get("search");
+
+        keyword = URLDecoder.decode(keyword.replaceAll("%(?![0-9a-fA-F]{2})", 
"%25"), "UTF-8");
+        if (keyword.contains("%2b") || keyword.contains("%20") || 
keyword.contains("%25")) {
+          keyword = keyword.replace("%2b", " ");
+          keyword = keyword.replace("%20", " ");
+          keyword = keyword.replace("%25", " ");
+        }
+        keyword = keyword.replaceAll("[-+^:,*_\"]", " ").replace("\\", " 
").replaceAll("\\s+", " ").trim();
+      } catch (UnsupportedEncodingException e) {
+        LOG.error(mapRequest.get("search"));
+        e.printStackTrace();
+      }
+    }
+
+    return keyword;
+  }
+
+  /**
+   * GetFilterInfo: Get filter params from url link
+   *
+   * @param url request url
+   * @return filter facet key pair map
+   * @throws UnsupportedEncodingException UnsupportedEncodingException
+   */
+  public static Map<String, String> getFilterInfo(String url) throws 
UnsupportedEncodingException {
+    List<String> info = new ArrayList<>();
+    Map<String, String> filterValues = new HashMap<>();
+
+    String keyword = "";
+    Map<String, String> mapRequest = RequestUrl.uRLRequest(url);
+    if (mapRequest.get("search") != null) {
+      try {
+        keyword = mapRequest.get("search");
+
+        keyword = URLDecoder.decode(keyword.replaceAll("%(?![0-9a-fA-F]{2})", 
"%25"), "UTF-8");
+        if (keyword.contains("%2b") || keyword.contains("%20") || 
keyword.contains("%25")) {
+          keyword = keyword.replace("%2b", " ");
+          keyword = keyword.replace("%20", " ");
+          keyword = keyword.replace("%25", " ");
+        }
+        keyword = keyword.replaceAll("[-+^:,*_\"]", " ").replace("\\", " 
").replaceAll("\\s+", " ").trim();
+
+      } catch (UnsupportedEncodingException e) {
+        LOG.error(mapRequest.get("search"));
+        e.printStackTrace();
+      }
+      if (!"".equals(keyword)) {
+        info.add(keyword);
+      }
+
+    }
+
+    if (mapRequest.get("ids") != null && mapRequest.get("values") != null) {
+      String idRaw = URLDecoder.decode(mapRequest.get("ids"), "UTF-8");
+      String valueRaw = URLDecoder.decode(mapRequest.get("values"), "UTF-8");
+      String[] ids = idRaw.split(":");
+      String[] values = valueRaw.split(":");
+
+      int a = ids.length;
+      int b = values.length;
+      int l = a < b ? a : b;
+
+      for (int i = 0; i < l; i++) {
+        try {
+          values[i] = values[i].replaceAll("%(?![0-9a-fA-F]{2})", "%25");
+          if (!URLDecoder.decode(values[i], "UTF-8").equals(keyword) && 
!URLDecoder.decode(values[i], "UTF-8").equals("")) {
+            String item = URLDecoder.decode(values[i], "UTF-8").trim();
+            if (item.contains("%2b") || item.contains("%20") || 
item.contains("%25")) {
+              item = item.replace("%2b", " ");
+              item = item.replace("%20", " ");
+              item = item.replace("%25", " ");
+            }
+            item = item.replaceAll("[-+^:,*_\"]", " ").replace("\\", " 
").replaceAll("\\s+", " ").trim();
+            filterValues.put(ids[i], item);
+          }
+        } catch (Exception e) {
+          LOG.error(values[i]);
+          e.printStackTrace();
+        }
+      }
+    }
+
+    if (mapRequest.get("temporalsearch") != null) {
+      String temporalsearch = mapRequest.get("temporalsearch");
+      temporalsearch = 
URLDecoder.decode(temporalsearch.replaceAll("%(?![0-9a-fA-F]{2})", "%25"), 
"UTF-8");
+
+      filterValues.put("temporalsearch", temporalsearch);
+    }
+
+    return filterValues;
+  }
+
+}


http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java
new file mode 100644
index 0000000..93f4288
--- /dev/null
+++ b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java
@@ -0,0 +1,287 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package gov.nasa.jpl.mudrod.weblog.structure;
+
+import com.google.gson.Gson;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import gov.nasa.jpl.mudrod.driver.ESDriver;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.sort.SortOrder;
+import org.joda.time.Seconds;
+import org.joda.time.format.DateTimeFormatter;
+import org.joda.time.format.ISODateTimeFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * ClassName: Session Function: Session operations.
+ */
+public class Session /*extends MudrodAbstract*/ implements Comparable<Session> 
{
+  private static final Logger LOG = LoggerFactory.getLogger(Session.class);
+  // start: start time of session
+  private String start;
+  // end: end time of session
+  private String end;
+  // id: original session ID
+  private String id;
+  // newid: new session ID
+  private String newid = null;
+  // fmt: time formatter
+  private DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
+
+  private ESDriver es;
+  private Properties props;
+
+  /**
+   * Creates a new instance of Session.
+   *
+   * @param props the Mudrod configuration
+   * @param es    the Elasticsearch drive
+   * @param start start time of session
+   * @param end   end time of session
+   * @param id    session ID
+   */
+  public Session(Properties props, ESDriver es, String start, String end, 
String id) {
+    this.start = start;
+    this.end = end;
+    this.id = id;
+
+    this.props = props;
+    this.es = es;
+  }
+
+  /**
+   * Creates a new instance of Session.
+   *
+   * @param props the Mudrod configuration
+   * @param es    the Elasticsearch drive
+   */
+  public Session(Properties props, ESDriver es) {
+    this.props = props;
+    this.es = es;
+  }
+
+  /**
+   * getID: Get original session ID
+   *
+   * @return session id
+   */
+  public String getID() {
+    return id;
+  }
+
+  /**
+   * getNewID: Get new session ID
+   *
+   * @return new session id
+   */
+  public String getNewID() {
+    return newid;
+  }
+
+  /**
+   * setNewID: Set new session ID
+   *
+   * @param str: session ID
+   * @return new session id
+   */
+  public String setNewID(String str) {
+    return newid = str;
+  }
+
+  /**
+   * getStartTime:Get start time of current session
+   *
+   * @return start time of session
+   */
+  public String getStartTime() {
+    return start;
+  }
+
+  /**
+   * getEndTime:Get end time of current session
+   *
+   * @return end time of session
+   */
+  public String getEndTime() {
+    return end;
+  }
+
+  /**
+   * Compare current session with another session
+   *
+   * @see java.lang.Comparable#compareTo(java.lang.Object)
+   */
+  @Override
+  public int compareTo(Session o) {
+    fmt.parseDateTime(this.end);
+    fmt.parseDateTime(o.end);
+    // ascending order
+    return Seconds.secondsBetween(fmt.parseDateTime(o.end), 
fmt.parseDateTime(this.end)).getSeconds();
+
+  }
+
+  /**
+   * getSessionDetail:Get detail of current session, which is used for session
+   * tree reconstruct
+   *
+   * @param indexName    name of index from which you wish to obtain session 
detail.
+   * @param type: Session type name in Elasticsearch
+   * @param sessionID:   Session ID
+   * @return Session details in Json format
+   */
+  public JsonObject getSessionDetail(String indexName, String type, String 
sessionID) {
+    JsonObject sessionResults = new JsonObject();
+    // for session tree
+    SessionTree tree = null;
+    JsonElement jsonRequest = null;
+    try {
+      tree = this.getSessionTree(indexName, type, sessionID);
+      JsonObject jsonTree = tree.treeToJson(tree.root);
+      sessionResults.add("treeData", jsonTree);
+
+      jsonRequest = this.getRequests(type, sessionID);
+      sessionResults.add("RequestList", jsonRequest);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("Encoding error detected.", e);
+
+    }
+
+    return sessionResults;
+  }
+
+  /**
+   * getClickStreamList: Extracted click stream list from current session.
+   *
+   * @param indexName    an index from which to query for a session list
+   * @param type: Session type name in Elasticsearch
+   * @param sessionID:   Session ID
+   * @return Click stram data list
+   * {@link ClickStream}
+   */
+  public List<ClickStream> getClickStreamList(String indexName, String type, 
String sessionID) {
+    SessionTree tree = null;
+    try {
+      tree = this.getSessionTree(indexName, type, sessionID);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("Erro whilst obtaining the Session Tree: {}", e);
+    }
+
+    List<ClickStream> clickthroughs = tree.getClickStreamList();
+    return clickthroughs;
+  }
+
+  /**
+   * Method of converting a given session to a tree structure
+   *
+   * @param type session type name in Elasticsearch
+   * @param sessionID   ID of session
+   * @return an instance of session tree structure
+   * @throws UnsupportedEncodingException UnsupportedEncodingException
+   */
+  private SessionTree getSessionTree(String indexName, String type, String 
sessionID) throws UnsupportedEncodingException {
+
+    SearchResponse response = 
es.getClient().prepareSearch(indexName).setTypes(type).setQuery(QueryBuilders.termQuery("SessionID",
 sessionID)).setSize(100).addSort("Time", SortOrder.ASC)
+        .execute().actionGet();
+
+    SessionTree tree = new SessionTree(this.props, this.es, sessionID, type);
+    int seq = 1;
+    for (SearchHit hit : response.getHits().getHits()) {
+      Map<String, Object> result = hit.getSource();
+      String request = (String) result.get("Request");
+      String time = (String) result.get("Time");
+      String logType = (String) result.get("LogType");
+      String referer = (String) result.get("Referer");
+
+      SessionNode node = new SessionNode(request, logType, referer, time, seq);
+      tree.insert(node);
+      seq++;
+    }
+
+    return tree;
+  }
+
+  /**
+   * Method of getting all requests from a given current session
+   *
+   * @param cleanuptype Session type name in Elasticsearch
+   * @param sessionID   Session ID
+   * @return all of these requests in JSON
+   * @throws UnsupportedEncodingException UnsupportedEncodingException
+   */
+  private JsonElement getRequests(String cleanuptype, String sessionID) throws 
UnsupportedEncodingException {
+    SearchResponse response = 
es.getClient().prepareSearch(props.getProperty("indexName")).setTypes(cleanuptype).setQuery(QueryBuilders.termQuery("SessionID",
 sessionID)).setSize(100)
+        .addSort("Time", SortOrder.ASC).execute().actionGet();
+
+    Gson gson = new Gson();
+    List<JsonObject> requestList = new ArrayList<>();
+    int seq = 1;
+    for (SearchHit hit : response.getHits().getHits()) {
+      Map<String, Object> result = hit.getSource();
+      String request = (String) result.get("Request");
+      String requestUrl = (String) result.get("RequestUrl");
+      String time = (String) result.get("Time");
+      String logType = (String) result.get("LogType");
+      String referer = (String) result.get("Referer");
+
+      JsonObject req = new JsonObject();
+      req.addProperty("Time", time);
+      req.addProperty("Request", request);
+      req.addProperty("RequestURL", requestUrl);
+      req.addProperty("LogType", logType);
+      req.addProperty("Referer", referer);
+      req.addProperty("Seq", seq);
+      requestList.add(req);
+
+      seq++;
+    }
+    return gson.toJsonTree(requestList);
+  }
+
+  /**
+   * getClickStreamList: Extracted ranking training data from current session.
+   *
+   * @param indexName    an index from which to obtain ranked training data.
+   * @param cleanuptype: Session type name in Elasticsearch
+   * @param sessionID:   Session ID
+   * @return Click stram data list
+   * {@link ClickStream}
+   */
+  public List<RankingTrainData> getRankingTrainData(String indexName, String 
cleanuptype, String sessionID) {
+    SessionTree tree = null;
+    try {
+      tree = this.getSessionTree(indexName, cleanuptype, sessionID);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("Error whilst retreiving Session Tree: {}", e);
+    }
+
+    List<RankingTrainData> trainData = new ArrayList<>();
+    try {
+      trainData = tree.getRankingTrainData(indexName, sessionID);
+    } catch (UnsupportedEncodingException e) {
+      LOG.error("Error whilst retreiving ranking training data: {}", e);
+    }
+
+    return trainData;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java
new file mode 100644
index 0000000..edba32e
--- /dev/null
+++ 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java
@@ -0,0 +1,532 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package gov.nasa.jpl.mudrod.weblog.structure;
+
+import gov.nasa.jpl.mudrod.driver.ESDriver;
+import gov.nasa.jpl.mudrod.driver.SparkDriver;
+import gov.nasa.jpl.mudrod.main.MudrodConstants;
+
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.Optional;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.Function;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.index.query.QueryBuilders;
+import org.elasticsearch.search.SearchHit;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import scala.Tuple2;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+/**
+ * ClassName: SessionExtractor Function: Extract sessions details from
+ * reconstructed sessions.
+ */
+public class SessionExtractor implements Serializable {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(SessionExtractor.class);
+
+  /**
+   *
+   */
+  private static final long serialVersionUID = 1L;
+
+  public SessionExtractor() {
+    // default constructor
+  }
+
+  /**
+   * extractClickStreamFromES:Extract click streams from logs stored in
+   * Elasticsearch
+   *
+   * @param props
+   *          the Mudrod configuration
+   * @param es
+   *          the Elasticsearch drive
+   * @param spark
+   *          the spark driver
+   * @return clickstream list in JavaRDD format {@link ClickStream}
+   */
+  public JavaRDD<ClickStream> extractClickStreamFromES(Properties props, 
ESDriver es, SparkDriver spark) {
+    switch (props.getProperty(MudrodConstants.PROCESS_TYPE)) {
+      case "sequential":
+        List<ClickStream> queryList = this.getClickStreamList(props, es);
+        return spark.sc.parallelize(queryList);
+      case "parallel":
+        return getClickStreamListInParallel(props, spark, es);
+      default:
+      LOG.error("Error finding processing type for '{}'. Please check your 
config.xml.", props.getProperty(MudrodConstants.PROCESS_TYPE));
+    }
+    return null;
+  }
+
+  /**
+   * getClickStreamList:Extract click streams from logs stored in 
Elasticsearch.
+   *
+   * @param props
+   *          the Mudrod configuration
+   * @param es
+   *          the Elasticsearch driver
+   * @return clickstream list {@link ClickStream}
+   */
+  protected List<ClickStream> getClickStreamList(Properties props, ESDriver 
es) {
+    List<String> logIndexList = 
es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
+
+    List<ClickStream> result = new ArrayList<>();
+    for (int n = 0; n < logIndexList.size(); n++) {
+      String logIndex = logIndexList.get(n);
+      List<String> sessionIdList;
+      try {
+        sessionIdList = this.getSessions(props, es, logIndex);
+        Session session = new Session(props, es);
+        int sessionNum = sessionIdList.size();
+        for (int i = 0; i < sessionNum; i++) {
+          String[] sArr = sessionIdList.get(i).split(",");
+          List<ClickStream> datas = session.getClickStreamList(sArr[1], 
sArr[2], sArr[0]);
+          result.addAll(datas);
+        }
+      } catch (Exception e) {
+        LOG.error("Error during extraction of Clickstreams from log index. 
{}", e);
+      }
+    }
+
+    return result;
+  }
+
+  protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties 
props, SparkDriver spark, ESDriver es) {
+
+    List<String> logIndexList = 
es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
+
+    LOG.info("Retrieved {}", logIndexList.toString());
+
+    List<String> sessionIdList = new ArrayList<>();
+    for (int n = 0; n < logIndexList.size(); n++) {
+      String logIndex = logIndexList.get(n);
+      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
+      sessionIdList.addAll(tmpsessionList);
+    }
+
+    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
+
+    JavaRDD<ClickStream> clickStreamRDD = sessionRDD.mapPartitions(new 
FlatMapFunction<Iterator<String>, ClickStream>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Iterator<ClickStream> call(Iterator<String> arg0) throws 
Exception {
+        ESDriver tmpES = new ESDriver(props);
+        tmpES.createBulkProcessor();
+
+        Session session = new Session(props, tmpES);
+        List<ClickStream> clickstreams = new ArrayList<>();
+        while (arg0.hasNext()) {
+          String s = arg0.next();
+          String[] sArr = s.split(",");
+          List<ClickStream> clicks = session.getClickStreamList(sArr[1], 
sArr[2], sArr[0]);
+          clickstreams.addAll(clicks);
+        }
+        tmpES.destroyBulkProcessor();
+        tmpES.close();
+        return clickstreams.iterator();
+      }
+    });
+
+    LOG.info("Clickstream number: {}", clickStreamRDD.count());
+
+    return clickStreamRDD;
+  }
+
+  // This function is reserved and not being used for now
+
+  /**
+   * loadClickStremFromTxt:Load click stream form txt file
+   *
+   * @param clickthroughFile
+   *          txt file
+   * @param sc
+   *          the spark context
+   * @return clickstream list in JavaRDD format {@link ClickStream}
+   */
+  public JavaRDD<ClickStream> loadClickStremFromTxt(String clickthroughFile, 
JavaSparkContext sc) {
+    return sc.textFile(clickthroughFile).flatMap(new FlatMapFunction<String, 
ClickStream>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @SuppressWarnings("unchecked")
+      @Override
+      public Iterator<ClickStream> call(String line) throws Exception {
+        List<ClickStream> clickthroughs = (List<ClickStream>) 
ClickStream.parseFromTextLine(line);
+        return (Iterator<ClickStream>) clickthroughs;
+      }
+    });
+  }
+
+  /**
+   * bulidDataQueryRDD: convert click stream list to data set queries pairs.
+   *
+   * @param clickstreamRDD:
+   *          click stream data
+   * @param downloadWeight:
+   *          weight of download behavior
+   * @return JavaPairRDD, key is short name of data set, and values are queries
+   */
+  public JavaPairRDD<String, List<String>> 
bulidDataQueryRDD(JavaRDD<ClickStream> clickstreamRDD, int downloadWeight) {
+    return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, 
List<String>>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Tuple2<String, List<String>> call(ClickStream click) throws 
Exception {
+        List<String> query = new ArrayList<>();
+        // important! download behavior is given higher weights
+        // than viewing
+        // behavior
+        boolean download = click.isDownload();
+        int weight = 1;
+        if (download) {
+          weight = downloadWeight;
+        }
+        for (int i = 0; i < weight; i++) {
+          query.add(click.getKeyWords());
+        }
+
+        return new Tuple2<>(click.getViewDataset(), query);
+      }
+    }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public List<String> call(List<String> v1, List<String> v2) throws 
Exception {
+        List<String> list = new ArrayList<>();
+        list.addAll(v1);
+        list.addAll(v2);
+        return list;
+      }
+    });
+  }
+
+  /**
+   * getSessions: Get sessions from logs
+   *
+   * @param props
+   *          the Mudrod configuration
+   * @param es
+   *          the Elasticsearch driver
+   * @param logIndex
+   *          a log index name
+   * @return list of session names
+   */
+  protected List<String> getSessions(Properties props, ESDriver es, String 
logIndex) {
+
+    String cleanupPrefix = 
props.getProperty(MudrodConstants.CLEANUP_TYPE_PREFIX);
+    String sessionStatPrefix = 
props.getProperty(MudrodConstants.SESSION_STATS_PREFIX);
+
+    List<String> sessions = new ArrayList<>();
+    SearchResponse scrollResp = 
es.getClient().prepareSearch(logIndex).setTypes(sessionStatPrefix).setScroll(new
 
TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery()).setSize(100).execute()
+            .actionGet();
+    while (true) {
+      for (SearchHit hit : scrollResp.getHits().getHits()) {
+        Map<String, Object> session = hit.getSource();
+        String sessionID = (String) session.get("SessionID");
+        sessions.add(sessionID + "," + logIndex + "," + cleanupPrefix);
+      }
+
+      scrollResp = 
es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new 
TimeValue(600000)).execute().actionGet();
+      if (scrollResp.getHits().getHits().length == 0) {
+        break;
+      }
+    }
+
+    return sessions;
+  }
+
+  public JavaPairRDD<String, Double> bulidUserItermRDD(JavaRDD<ClickStream> 
clickstreamRDD) {
+    return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, 
Double>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Tuple2<String, Double> call(ClickStream click) throws Exception {
+        double rate = 1;
+        boolean download = click.isDownload();
+        if (download) {
+          rate = 2;
+        }
+
+        String sessionID = click.getSessionID();
+        String user = sessionID.split("@")[0];
+
+        return new Tuple2<>(user + "," + click.getViewDataset(), rate);
+      }
+    }).reduceByKey(new Function2<Double, Double, Double>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Double call(Double v1, Double v2) throws Exception {
+        return v1 >= v2 ? v1 : v2;
+
+      }
+    });
+  }
+
+  public JavaPairRDD<String, Double> bulidSessionItermRDD(JavaRDD<ClickStream> 
clickstreamRDD) {
+    JavaPairRDD<String, String> sessionItemRDD = clickstreamRDD.mapToPair(new 
PairFunction<ClickStream, String, String>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Tuple2<String, String> call(ClickStream click) throws Exception {
+
+        String sessionID = click.getSessionID();
+        return new Tuple2<>(sessionID, click.getViewDataset());
+      }
+    }).distinct();
+
+    // remove some sessions
+    JavaPairRDD<String, Double> sessionItemNumRDD = 
sessionItemRDD.keys().mapToPair(new PairFunction<String, String, Double>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Tuple2<String, Double> call(String item) throws Exception {
+        return new Tuple2<>(item, 1.0);
+      }
+    }).reduceByKey(new Function2<Double, Double, Double>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Double call(Double v1, Double v2) throws Exception {
+        return v1 + v2;
+      }
+    }).filter(new Function<Tuple2<String, Double>, Boolean>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Boolean call(Tuple2<String, Double> arg0) throws Exception {
+        Boolean b = true;
+        if (arg0._2 < 2) {
+          b = false;
+        }
+        return b;
+      }
+    });
+
+    return sessionItemNumRDD.leftOuterJoin(sessionItemRDD).mapToPair(new 
PairFunction<Tuple2<String, Tuple2<Double, Optional<String>>>, String, 
Double>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Tuple2<String, Double> call(Tuple2<String, Tuple2<Double, 
Optional<String>>> arg0) throws Exception {
+
+        Tuple2<Double, Optional<String>> test = arg0._2;
+        Optional<String> optStr = test._2;
+        String item = "";
+        if (optStr.isPresent()) {
+          item = optStr.get();
+        }
+        return new Tuple2<>(arg0._1 + "," + item, 1.0);
+      }
+
+    });
+  }
+
+  public JavaPairRDD<String, List<String>> bulidSessionDatasetRDD(Properties 
props, ESDriver es, SparkDriver spark) {
+
+    List<String> result = new ArrayList<>();
+    List<String> logIndexList = 
es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
+    for (int n = 0; n < logIndexList.size(); n++) {
+      String logIndex = logIndexList.get(n);
+      SearchResponse scrollResp = 
es.getClient().prepareSearch(logIndex).setTypes(props.getProperty(MudrodConstants.SESSION_STATS_PREFIX)).setScroll(new
 TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery())
+              .setSize(100).execute().actionGet();
+      while (true) {
+        for (SearchHit hit : scrollResp.getHits().getHits()) {
+          Map<String, Object> session = hit.getSource();
+          String sessionID = (String) session.get("SessionID");
+          String views = (String) session.get("views");
+          if (views != null && !"".equals(views)) {
+            String sessionItems = sessionID + ":" + views;
+            result.add(sessionItems);
+          }
+        }
+
+        scrollResp = 
es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new 
TimeValue(600000)).execute().actionGet();
+        if (scrollResp.getHits().getHits().length == 0) {
+          break;
+        }
+      }
+    }
+
+    JavaRDD<String> sessionRDD = spark.sc.parallelize(result);
+
+    return sessionRDD.mapToPair(new PairFunction<String, String, 
List<String>>() {
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Tuple2<String, List<String>> call(String sessionitem) throws 
Exception {
+        String[] splits = sessionitem.split(":");
+        String sessionId = splits[0];
+        List<String> itemList = new ArrayList<>();
+
+        String items = splits[1];
+        String[] itemArr = items.split(",");
+        int size = itemArr.length;
+        for (int i = 0; i < size; i++) {
+          String item = itemArr[i];
+          if (!itemList.contains(item))
+            itemList.add(itemArr[i]);
+        }
+
+        return new Tuple2<>(sessionId, itemList);
+      }
+    });
+  }
+
+  /**
+   * extractClickStreamFromES:Extract click streams from logs stored in
+   * Elasticsearch
+   *
+   * @param props
+   *          the Mudrod configuration
+   * @param es
+   *          the Elasticsearch drive
+   * @param spark
+   *          the spark driver
+   * @return clickstream list in JavaRDD format {@link ClickStream}
+   */
+  public JavaRDD<RankingTrainData> extractRankingTrainData(Properties props, 
ESDriver es, SparkDriver spark) {
+
+    List<RankingTrainData> queryList = this.extractRankingTrainData(props, es);
+    return spark.sc.parallelize(queryList);
+
+  }
+
+  /**
+   * getClickStreamList:Extract click streams from logs stored in 
Elasticsearch.
+   *
+   * @param props
+   *          the Mudrod configuration
+   * @param es
+   *          the Elasticsearch driver
+   * @return clickstream list {@link ClickStream}
+   */
+  protected List<RankingTrainData> extractRankingTrainData(Properties props, 
ESDriver es) {
+    List<String> logIndexList = 
es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
+
+    LOG.info(logIndexList.toString());
+
+    List<RankingTrainData> result = new ArrayList<>();
+    for (int n = 0; n < logIndexList.size(); n++) {
+      String logIndex = logIndexList.get(n);
+      List<String> sessionIdList;
+      try {
+        sessionIdList = this.getSessions(props, es, logIndex);
+        Session session = new Session(props, es);
+        int sessionNum = sessionIdList.size();
+        for (int i = 0; i < sessionNum; i++) {
+          String[] sArr = sessionIdList.get(i).split(",");
+          List<RankingTrainData> datas = session.getRankingTrainData(sArr[1], 
sArr[2], sArr[0]);
+          result.addAll(datas);
+        }
+      } catch (Exception e) {
+        LOG.error("Error which extracting ranking train data: {}", e);
+      }
+    }
+
+    return result;
+  }
+
+  protected JavaRDD<RankingTrainData> 
extractRankingTrainDataInParallel(Properties props, SparkDriver spark, ESDriver 
es) {
+
+    List<String> logIndexList = 
es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
+
+    LOG.info(logIndexList.toString());
+
+    List<String> sessionIdList = new ArrayList<>();
+    for (int n = 0; n < logIndexList.size(); n++) {
+      String logIndex = logIndexList.get(n);
+      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
+      sessionIdList.addAll(tmpsessionList);
+    }
+
+    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
+
+    JavaRDD<RankingTrainData> clickStreamRDD = sessionRDD.mapPartitions(new 
FlatMapFunction<Iterator<String>, RankingTrainData>() {
+      /**
+       *
+       */
+      private static final long serialVersionUID = 1L;
+
+      @Override
+      public Iterator<RankingTrainData> call(Iterator<String> arg0) throws 
Exception {
+        ESDriver tmpES = new ESDriver(props);
+        tmpES.createBulkProcessor();
+
+        Session session = new Session(props, tmpES);
+        List<RankingTrainData> clickstreams = new ArrayList<>();
+        while (arg0.hasNext()) {
+          String s = arg0.next();
+          String[] sArr = s.split(",");
+          List<RankingTrainData> clicks = session.getRankingTrainData(sArr[1], 
sArr[2], sArr[0]);
+          clickstreams.addAll(clicks);
+        }
+        tmpES.destroyBulkProcessor();
+        tmpES.close();
+        return clickstreams.iterator();
+      }
+    });
+
+    LOG.info("Clickstream number: {}", clickStreamRDD.count());
+
+    return clickStreamRDD;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java
new file mode 100644
index 0000000..958e184
--- /dev/null
+++ b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java
@@ -0,0 +1,344 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package gov.nasa.jpl.mudrod.weblog.structure;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * ClassName: SessionNode Function: Functions related to a node in a session
+ * tree sturcture.
+ */
+public class SessionNode {
+  // id: Node ID
+  protected String id;
+  // value: Node value
+  protected String value;
+  // parent: Parent node of this node
+  protected SessionNode parent;
+  // children: Child nodes of this node
+  protected List<SessionNode> children = new ArrayList<>();
+  // time: request time of node
+  protected String time;
+  // request: request url of this node
+  protected String request;
+  // referer: previous request url of this node
+  protected String referer;
+  // seq: sequence of this node
+  protected int seq;
+  // key: type of this node extracted from url, including three types -
+  // dataset,datasetlist,ftp
+  protected String key;
+  // logType: log types of this node, including two types - po.dacc, ftp
+  protected String logType;
+  // search: query extracted from this node
+  protected String search;
+  // filter: filter facets extracted from this node
+  protected Map<String, String> filter;
+  // datasetId: viewed/downloaded data set ID
+  protected String datasetId;
+
+  public SessionNode() {
+
+  }
+
+  /**
+   * Creates a new instance of SessionNode.
+   *
+   * @param request: request url
+   * @param logType: including two types - po.dacc, ftp
+   * @param referer: previous request url
+   * @param time:    request time of node
+   * @param seq:     sequence of this node
+   */
+  public SessionNode(String request, String logType, String referer, String 
time, int seq) {
+    this.logType = logType;
+    this.time = time;
+    this.seq = seq;
+    this.setRequest(request);
+    this.setReferer(referer);
+    this.setKey(request, logType);
+  }
+
+  /**
+   * setReferer: Set previous request url of this node
+   *
+   * @param referer previous request url
+   */
+  public void setReferer(String referer) {
+    if (referer == null) {
+      this.referer = "";
+      return;
+    }
+    this.referer = referer.toLowerCase().replace("http://podaac.jpl.nasa.gov";, 
"");
+  }
+
+  /**
+   * setRequest: Set request url of this node
+   *
+   * @param req request url
+   */
+  public void setRequest(String req) {
+    this.request = req;
+    if (this.logType.equals("PO.DAAC")) {
+      this.parseRequest(req);
+    }
+  }
+
+  /**
+   * getChildren:Get child nodes of this node
+   *
+   * @return child nodes
+   */
+  public List<SessionNode> getChildren() {
+    return this.children;
+  }
+
+  /**
+   * setChildren: Set child nodes of this node
+   *
+   * @param children child nodes of this node
+   */
+  public void setChildren(List<SessionNode> children) {
+    this.children = children;
+  }
+
+  /**
+   * addChildren: Add a children node
+   *
+   * @param node session node
+   */
+  public void addChildren(SessionNode node) {
+    this.children.add(node);
+  }
+
+  /**
+   * getId:Get node ID
+   *
+   * @return node ID of this node
+   */
+  public String getId() {
+    return this.id;
+  }
+
+  /**
+   * bSame:Compare this node with another node
+   *
+   * @param node {@link SessionNode}
+   * @return boolean value, true mean the two nodes are same
+   */
+  public Boolean bSame(SessionNode node) {
+    Boolean bsame = false;
+    if (this.request.equals(node.request)) {
+      bsame = true;
+    }
+    return bsame;
+  }
+
+  /**
+   * setKey:Set request type which contains three categories -
+   * dataset,datasetlist,ftp
+   *
+   * @param request request url
+   * @param logType url type
+   */
+  public void setKey(String request, String logType) {
+    this.key = "";
+    String datasetlist = "/datasetlist?";
+    String dataset = "/dataset/";
+    if (logType.equals("ftp")) {
+      this.key = "ftp";
+    } else if (logType.equals("root")) {
+      this.key = "root";
+    } else {
+      if (request.contains(datasetlist)) {
+        this.key = "datasetlist";
+      } else if (request.contains(dataset) /* || request.contains(granule) */) 
{
+        this.key = "dataset";
+      }
+    }
+  }
+
+  /**
+   * getKey:Get request type which contains three categories -
+   * dataset,datasetlist,ftp
+   *
+   * @return request url type of this node
+   */
+  public String getKey() {
+    return this.key;
+  }
+
+  /**
+   * getRequest:Get node request
+   *
+   * @return request url of this node
+   */
+  public String getRequest() {
+    return this.request;
+  }
+
+  /**
+   * getReferer:Get previous request url of this node
+   *
+   * @return previous request url of this node
+   */
+  public String getReferer() {
+    return this.referer;
+  }
+
+  /**
+   * getParent:Get parent node of this node
+   *
+   * @return parent node of this node
+   */
+  public SessionNode getParent() {
+    return this.parent;
+  }
+
+  /**
+   * setParent: Set parent node of this node
+   *
+   * @param parent the previous request node of this node
+   */
+  public void setParent(SessionNode parent) {
+    this.parent = parent;
+  }
+
+  /**
+   * getSearch:Get query of this node
+   *
+   * @return search query of this node
+   */
+  public String getSearch() {
+    return this.search;
+  }
+
+  /**
+   * getFilter:Get filter facets of this node
+   *
+   * @return filter values of this node
+   */
+  public Map<String, String> getFilter() {
+    return this.filter;
+  }
+
+  /**
+   * getDatasetId:Get data set ID of this node
+   *
+   * @return viewing/downloading data set of this node
+   */
+  public String getDatasetId() {
+    return this.datasetId;
+  }
+
+  /**
+   * getSeq:Get sequence of this node
+   *
+   * @return request sequence of this node
+   */
+  public int getSeq() {
+    return this.seq;
+  }
+
+  /**
+   * getFilterStr:Get filter facets of this node
+   *
+   * @return filters values of this node
+   */
+  public String getFilterStr() {
+    String filter = "";
+    if (this.filter.size() > 0) {
+      Iterator iter = this.filter.keySet().iterator();
+      while (iter.hasNext()) {
+        String key = (String) iter.next();
+        String val = this.filter.get(key);
+        filter += key + "=" + val + ",";
+      }
+
+      filter = filter.substring(0, filter.length() - 1);
+    }
+
+    return filter;
+  }
+
+  /**
+   * parseRequest:Parse request to extract request type
+   *
+   * @param request request url of this node
+   */
+  public void parseRequest(String request) {
+    Pattern pattern = Pattern.compile("get (.*?) http/*");
+    Matcher matcher = pattern.matcher(request.trim().toLowerCase());
+    while (matcher.find()) {
+      request = matcher.group(1);
+    }
+    if (request.contains("/dataset/")) {
+      this.parseDatasetId(request);
+    }
+
+    this.request = request.toLowerCase();
+  }
+
+  /**
+   * parseFilterParams:Parse filter facets information
+   *
+   * @param params filter key value pairs of this node
+   */
+  private void parseFilterParams(Map<String, String> params) {
+    this.filter = new HashMap<String, String>();
+    if (params.containsKey("ids")) {
+      String idsStr = params.get("ids");
+      if (!idsStr.equals("")) {
+        idsStr = URLDecoder.decode(idsStr);
+        String[] ids = idsStr.split(":");
+        String valueStr = params.get("values");
+        if (valueStr != null) {
+          valueStr = URLDecoder.decode(valueStr);
+          String[] values = valueStr.split(":");
+          int size = ids.length;
+          for (int i = 0; i < size; i++) {
+            this.filter.put(ids[i], values[i]);
+          }
+        }
+      }
+    }
+
+    if (!this.search.equals("")) {
+      this.filter.put("search", this.search);
+    }
+  }
+
+  /**
+   * parseDatasetId:Parse Request to extract data set ID
+   *
+   * @param request request url
+   */
+  public void parseDatasetId(String request) {
+    try {
+      request = URLDecoder.decode(request, "UTF-8");
+    } catch (UnsupportedEncodingException e) {
+      e.printStackTrace();
+    }
+    String[] twoparts = request.split("[?]");
+    String[] parts = twoparts[0].split("/");
+    if (parts.length <= 2) {
+      return;
+    }
+    this.datasetId = parts[2];
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java
new file mode 100644
index 0000000..46c8d0c
--- /dev/null
+++ b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java
@@ -0,0 +1,521 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package gov.nasa.jpl.mudrod.weblog.structure;
+
+import com.google.gson.Gson;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
+import gov.nasa.jpl.mudrod.driver.ESDriver;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.UnsupportedEncodingException;
+import java.util.*;
+import java.util.concurrent.ExecutionException;
+
+/**
+ * ClassName: SessionTree Function: Convert request list in a session to a tree
+ */
+public class SessionTree extends MudrodAbstract {
+
+  /**
+   *
+   */
+  private static final long serialVersionUID = 1L;
+  private static final Logger LOG = LoggerFactory.getLogger(SessionTree.class);
+  // size: node numbers in the session tree
+  public int size = 0;
+  // root: root node of session tree
+  protected SessionNode root = null;
+  // binsert: indicates inserting a node or not
+  public boolean binsert = false;
+  // tmpnode: tempt node
+  public SessionNode tmpnode;
+  // latestDatasetnode: the latest inserted node whose key is "dataset"
+  public SessionNode latestDatasetnode;
+  // sessionID: session ID
+  private String sessionID;
+  // cleanupType: session type in Elasticsearch
+  private String cleanupType;
+
+  /**
+   * Creates a new instance of SessionTree.
+   *
+   * @param props:       the Mudrod configuration
+   * @param es:          the Elasticsearch drive
+   * @param rootData:    root node of the tree
+   * @param sessionID:   session ID
+   * @param cleanupType: session type
+   */
+  public SessionTree(Properties props, ESDriver es, SessionNode rootData, 
String sessionID, String cleanupType) {
+    super(props, es, null);
+    root = new SessionNode("root", "root", "", "", 0);
+    tmpnode = root;
+    this.sessionID = sessionID;
+    this.cleanupType = cleanupType;
+  }
+
+  /**
+   * Creates a new instance of SessionTree.
+   *
+   * @param props:       the Mudrod configuration
+   * @param es:          the Elasticsearch drive
+   * @param sessionID:   session ID
+   * @param cleanupType: session type
+   */
+  public SessionTree(Properties props, ESDriver es, String sessionID, String 
cleanupType) {
+    super(props, es, null);
+    root = new SessionNode("root", "root", "", "", 0);
+    root.setParent(root);
+    tmpnode = root;
+    this.sessionID = sessionID;
+    this.cleanupType = cleanupType;
+  }
+
+  /**
+   * insert: insert a node into the session tree.
+   *
+   * @param node {@link SessionNode}
+   * @return session node
+   */
+  public SessionNode insert(SessionNode node) {
+    // begin with datasetlist
+    if (node.getKey().equals("datasetlist")) {
+      this.binsert = true;
+    }
+    if (!this.binsert) {
+      return null;
+    }
+    // remove unrelated node
+    if (!node.getKey().equals("datasetlist") && 
!node.getKey().equals("dataset") && !node.getKey().equals("ftp")) {
+      return null;
+    }
+    // remove dumplicated click
+    if (node.getRequest().equals(tmpnode.getRequest())) {
+      return null;
+    }
+    // search insert node
+    SessionNode parentnode = this.searchParentNode(node);
+    if (parentnode == null) {
+      return null;
+    }
+    node.setParent(parentnode);
+    parentnode.addChildren(node);
+
+    // record insert node
+    tmpnode = node;
+    if ("dataset".equals(node.getKey())) {
+      latestDatasetnode = node;
+    }
+
+    size++;
+    return node;
+  }
+
+  /**
+   * printTree: Print session tree
+   *
+   * @param node root node of the session tree
+   */
+  public void printTree(SessionNode node) {
+    LOG.info("node: {} \n", node.getRequest());
+    if (node.children.isEmpty()) {
+      for (int i = 0; i < node.children.size(); i++) {
+        printTree(node.children.get(i));
+      }
+    }
+  }
+
+  /**
+   * TreeToJson: Convert the session tree to Json object
+   *
+   * @param node node of the session tree
+   * @return tree content in Json format
+   */
+  public JsonObject treeToJson(SessionNode node) {
+    Gson gson = new Gson();
+    JsonObject json = new JsonObject();
+
+    json.addProperty("seq", node.getSeq());
+    if ("datasetlist".equals(node.getKey())) {
+      json.addProperty("icon", "./resources/images/searching.png");
+      json.addProperty("name", node.getRequest());
+    } else if ("dataset".equals(node.getKey())) {
+      json.addProperty("icon", "./resources/images/viewing.png");
+      json.addProperty("name", node.getDatasetId());
+    } else if ("ftp".equals(node.getKey())) {
+      json.addProperty("icon", "./resources/images/downloading.png");
+      json.addProperty("name", node.getRequest());
+    } else if ("root".equals(node.getKey())) {
+      json.addProperty("name", "");
+      json.addProperty("icon", "./resources/images/users.png");
+    }
+
+    if (!node.children.isEmpty()) {
+      List<JsonObject> jsonChildren = new ArrayList<>();
+      for (int i = 0; i < node.children.size(); i++) {
+        JsonObject jsonChild = treeToJson(node.children.get(i));
+        jsonChildren.add(jsonChild);
+      }
+      JsonElement jsonElement = gson.toJsonTree(jsonChildren);
+      json.add("children", jsonElement);
+    }
+
+    return json;
+  }
+
+  /**
+   * getClickStreamList: Get click stream list in the session
+   *
+   * @return {@link ClickStream}
+   */
+  public List<ClickStream> getClickStreamList() {
+
+    List<ClickStream> clickthroughs = new ArrayList<>();
+    List<SessionNode> viewnodes = this.getViewNodes(this.root);
+    for (int i = 0; i < viewnodes.size(); i++) {
+
+      SessionNode viewnode = viewnodes.get(i);
+      SessionNode parent = viewnode.getParent();
+      List<SessionNode> children = viewnode.getChildren();
+
+      if (!"datasetlist".equals(parent.getKey())) {
+        continue;
+      }
+
+      RequestUrl requestURL = new RequestUrl();
+      String viewquery = "";
+      try {
+        String infoStr = requestURL.getSearchInfo(viewnode.getRequest());
+        viewquery = es.customAnalyzing(props.getProperty("indexName"), 
infoStr);
+      } catch (UnsupportedEncodingException | InterruptedException | 
ExecutionException e) {
+        LOG.warn("Exception getting search info. Ignoring...", e);
+      }
+
+      String dataset = viewnode.getDatasetId();
+      boolean download = false;
+      for (int j = 0; j < children.size(); j++) {
+        SessionNode child = children.get(j);
+        if ("ftp".equals(child.getKey())) {
+          download = true;
+          break;
+        }
+      }
+
+      if (viewquery != null && !"".equals(viewquery)) {
+        String[] queries = viewquery.trim().split(",");
+        if (queries.length > 0) {
+          for (int k = 0; k < queries.length; k++) {
+            ClickStream data = new ClickStream(queries[k], dataset, download);
+            data.setSessionId(this.sessionID);
+            data.setType(this.cleanupType);
+            clickthroughs.add(data);
+          }
+        }
+      }
+    }
+
+    return clickthroughs;
+  }
+
+  /**
+   * searchParentNode:Get parent node of a session node
+   *
+   * @param node {@link SessionNode}
+   * @return node {@link SessionNode}
+   */
+  private SessionNode searchParentNode(SessionNode node) {
+
+    String nodeKey = node.getKey();
+
+    if ("datasetlist".equals(nodeKey)) {
+      if ("-".equals(node.getReferer())) {
+        return root;
+      } else {
+        SessionNode tmp = this.findLatestRefer(tmpnode, node.getReferer());
+        if (tmp == null) {
+          return root;
+        } else {
+          return tmp;
+        }
+      }
+    } else if ("dataset".equals(nodeKey)) {
+      if ("-".equals(node.getReferer())) {
+        return null;
+      } else {
+        return this.findLatestRefer(tmpnode, node.getReferer());
+      }
+    } else if ("ftp".equals(nodeKey)) {
+      return latestDatasetnode;
+    }
+
+    return tmpnode;
+  }
+
+  /**
+   * findLatestRefer: Find parent node whose visiting url is equal to the refer
+   * url of a session node
+   *
+   * @param node:  {@link SessionNode}
+   * @param refer: request url
+   * @return
+   */
+  private SessionNode findLatestRefer(SessionNode node, String refer) {
+    while (true) {
+      if ("root".equals(node.getKey())) {
+        return null;
+      }
+      SessionNode parentNode = node.getParent();
+      if (refer.equals(parentNode.getRequest())) {
+        return parentNode;
+      }
+
+      SessionNode tmp = this.iterChild(parentNode, refer);
+      if (tmp == null) {
+        node = parentNode;
+        continue;
+      } else {
+        return tmp;
+      }
+    }
+  }
+
+  /**
+   * iterChild:
+   *
+   * @param start
+   * @param refer
+   * @return
+   */
+  private SessionNode iterChild(SessionNode start, String refer) {
+    List<SessionNode> children = start.getChildren();
+    for (int i = children.size() - 1; i >= 0; i--) {
+      SessionNode tmp = children.get(i);
+      if (tmp.getChildren().isEmpty()) {
+        if (refer.equals(tmp.getRequest())) {
+          return tmp;
+        } else {
+          continue;
+        }
+      } else {
+        iterChild(tmp, refer);
+      }
+    }
+
+    return null;
+  }
+
+  /**
+   * check:
+   *
+   * @param children
+   * @param str
+   * @return
+   */
+  private boolean check(List<SessionNode> children, String str) {
+    for (int i = 0; i < children.size(); i++) {
+      if (children.get(i).key.equals(str)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * insertHelperChildren:
+   *
+   * @param entry
+   * @param children
+   * @return
+   */
+  private boolean insertHelperChildren(SessionNode entry, List<SessionNode> 
children) {
+    for (int i = 0; i < children.size(); i++) {
+      boolean result = insertHelper(entry, children.get(i));
+      if (result) {
+        return result;
+      }
+    }
+    return false;
+
+  }
+
+  /**
+   * insertHelper:
+   *
+   * @param entry
+   * @param node
+   * @return
+   */
+  private boolean insertHelper(SessionNode entry, SessionNode node) {
+    if ("datasetlist".equals(entry.key) || "dataset".equals(entry.key)) {
+      if ("datasetlist".equals(node.key)) {
+        if (node.children.isEmpty()) {
+          node.children.add(entry);
+          return true;
+        } else {
+          boolean flag = check(node.children, "datasetlist");
+          if (!flag) {
+            node.children.add(entry);
+            return true;
+          } else {
+            insertHelperChildren(entry, node.children);
+          }
+        }
+      } else {
+        insertHelperChildren(entry, node.children);
+      }
+    } else if ("ftp".equals(entry.key)) {
+      if ("dataset".equals(node.key)) {
+        if (node.children.isEmpty()) {
+          node.children.add(entry);
+          return true;
+        } else {
+          boolean flag = check(node.children, "dataset");
+          if (!flag) {
+            node.children.add(entry);
+            return true;
+          } else {
+            insertHelperChildren(entry, node.children);
+          }
+        }
+      } else {
+        insertHelperChildren(entry, node.children);
+      }
+    }
+
+    return false;
+  }
+
+  /**
+   * getViewNodes: Get a session node's child nodes whose key is "dataset".
+   *
+   * @param node
+   * @return a list of session node
+   */
+  private List<SessionNode> getViewNodes(SessionNode node) {
+
+    List<SessionNode> viewnodes = new ArrayList<>();
+    if ("dataset".equals(node.getKey())) {
+      viewnodes.add(node);
+    }
+
+    if (!node.children.isEmpty()) {
+      for (int i = 0; i < node.children.size(); i++) {
+        SessionNode childNode = node.children.get(i);
+        viewnodes.addAll(getViewNodes(childNode));
+      }
+    }
+
+    return viewnodes;
+  }
+
+  private List<SessionNode> getQueryNodes(SessionNode node) {
+    return this.getNodes(node, "datasetlist");
+  }
+
+  private List<SessionNode> getNodes(SessionNode node, String nodeKey) {
+
+    List<SessionNode> nodes = new ArrayList<>();
+    if (node.getKey().equals(nodeKey)) {
+      nodes.add(node);
+    }
+
+    if (!node.children.isEmpty()) {
+      for (int i = 0; i < node.children.size(); i++) {
+        SessionNode childNode = node.children.get(i);
+        nodes.addAll(getNodes(childNode, nodeKey));
+      }
+    }
+
+    return nodes;
+  }
+
+  /**
+   * Obtain the ranking training data.
+   *
+   * @param indexName   the index from whcih to obtain the data
+   * @param sessionID   a valid session identifier
+   * @return {@link ClickStream}
+   * @throws UnsupportedEncodingException if there is an error whilst
+   *                                      processing the ranking training data.
+   */
+  public List<RankingTrainData> getRankingTrainData(String indexName, String 
sessionID) throws UnsupportedEncodingException {
+
+    List<RankingTrainData> trainDatas = new ArrayList<>();
+
+    List<SessionNode> queryNodes = this.getQueryNodes(this.root);
+    for (int i = 0; i < queryNodes.size(); i++) {
+      SessionNode querynode = queryNodes.get(i);
+      List<SessionNode> children = querynode.getChildren();
+
+      LinkedHashMap<String, Boolean> datasetOpt = new LinkedHashMap<>();
+      int ndownload = 0;
+      for (int j = 0; j < children.size(); j++) {
+        SessionNode node = children.get(j);
+        if ("dataset".equals(node.getKey())) {
+          Boolean bDownload = false;
+          List<SessionNode> nodeChildren = node.getChildren();
+          int childSize = nodeChildren.size();
+          for (int k = 0; k < childSize; k++) {
+            if ("ftp".equals(nodeChildren.get(k).getKey())) {
+              bDownload = true;
+              ndownload += 1;
+              break;
+            }
+          }
+          datasetOpt.put(node.datasetId, bDownload);
+        }
+      }
+
+      // method 1: The priority of download data are higher
+      if (datasetOpt.size() > 1 && ndownload > 0) {
+        // query
+        RequestUrl requestURL = new RequestUrl();
+        String queryUrl = querynode.getRequest();
+        String infoStr = requestURL.getSearchInfo(queryUrl);
+        String query = null;
+        try {
+          query = es.customAnalyzing(props.getProperty("indexName"), infoStr);
+        } catch (InterruptedException | ExecutionException e) {
+          throw new RuntimeException("Error performing custom analyzing", e);
+        }
+        Map<String, String> filter = RequestUrl.getFilterInfo(queryUrl);
+
+        for (String datasetA : datasetOpt.keySet()) {
+          Boolean bDownloadA = datasetOpt.get(datasetA);
+          if (bDownloadA) {
+            for (String datasetB : datasetOpt.keySet()) {
+              Boolean bDownloadB = datasetOpt.get(datasetB);
+              if (!bDownloadB) {
+
+                String[] queries = query.split(",");
+                for (int l = 0; l < queries.length; l++) {
+                  RankingTrainData trainData = new 
RankingTrainData(queries[l], datasetA, datasetB);
+
+                  trainData.setSessionId(this.sessionID);
+                  trainData.setIndex(indexName);
+                  trainData.setFilter(filter);
+                  trainDatas.add(trainData);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+
+    return trainDatas;
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/WebLog.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/WebLog.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/WebLog.java
new file mode 100644
index 0000000..e3392e4
--- /dev/null
+++ b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/WebLog.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you 
+ * may not use this file except in compliance with the License. 
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package gov.nasa.jpl.mudrod.weblog.structure;
+
+import java.io.Serializable;
+
+/**
+ * This class represents an Apache access log line. See
+ * http://httpd.apache.org/docs/2.2/logs.html for more details.
+ */
+public class WebLog implements Serializable {
+  String LogType;
+  String IP;
+  String Time;
+  String Request;
+  double Bytes;
+
+  public String getLogType() {
+    return this.LogType;
+  }
+
+  public String getIP() {
+    return this.IP;
+  }
+
+  public String getTime() {
+    return this.Time;
+  }
+
+  public String getRequest() {
+    return this.Request;
+  }
+
+  public double getBytes() {
+    return this.Bytes;
+  }
+
+  public WebLog() {
+
+  }
+
+  public static String SwithtoNum(String time) {
+    if (time.contains("Jan")) {
+      time = time.replace("Jan", "1");
+    } else if (time.contains("Feb")) {
+      time = time.replace("Feb", "2");
+    } else if (time.contains("Mar")) {
+      time = time.replace("Mar", "3");
+    } else if (time.contains("Apr")) {
+      time = time.replace("Apr", "4");
+    } else if (time.contains("May")) {
+      time = time.replace("May", "5");
+    } else if (time.contains("Jun")) {
+      time = time.replace("Jun", "6");
+    } else if (time.contains("Jul")) {
+      time = time.replace("Jul", "7");
+    } else if (time.contains("Aug")) {
+      time = time.replace("Aug", "8");
+    } else if (time.contains("Sep")) {
+      time = time.replace("Sep", "9");
+    } else if (time.contains("Oct")) {
+      time = time.replace("Oct", "10");
+    } else if (time.contains("Nov")) {
+      time = time.replace("Nov", "11");
+    } else if (time.contains("Dec")) {
+      time = time.replace("Dec", "12");
+    }
+    return time;
+  }
+
+  public static boolean checknull(String s) {
+    if (s.equals("{}")) {
+      return false;
+    }
+    return true;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/package-info.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/package-info.java 
b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/package-info.java
new file mode 100644
index 0000000..7aa9898
--- /dev/null
+++ b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/package-info.java
@@ -0,0 +1,17 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License"); you 
+ * may not use this file except in compliance with the License. 
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * This package includes data structure needed for web log analysis
+ */
+package gov.nasa.jpl.mudrod.weblog.structure;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/config.xml
----------------------------------------------------------------------
diff --git a/core/src/main/resources/config.xml 
b/core/src/main/resources/config.xml
new file mode 100644
index 0000000..62ac7a6
--- /dev/null
+++ b/core/src/main/resources/config.xml
@@ -0,0 +1,129 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- 
+  Licensed under the Apache License, Version 2.0 (the "License"); 
+  you may not use this file except in compliance with the License. 
+  You may obtain  a copy of the License at 
+  
+  http://www.apache.org/licenses/LICENSE-2.0 Unless 
+  
+  required by applicable law or agreed to in writing, software 
+  distributed under the License is distributed on an "AS IS" 
+  BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
+  express or implied. See the License for the specific language 
+  governing permissions and limitations under the License. 
+-->
+<Config>
+    <para name="Cleanup_type_prefix">cleanupLog</para>
+
+    <para name="clickStreamLinkageType">ClickStreamLinkage</para>
+
+    <para name="clickStreamMatrixType">clickstreamMatrix</para>
+
+    <para name="clickstreamSVDDimension">50</para>
+
+    <para name="clickStream_w">2</para>
+
+    <para name="commentType">comment</para>
+
+    <para name="downloadf">100</para>
+
+    <para name="downloadWeight">3</para>
+
+    <para name="clusterName">MudrodES</para>
+
+    <para name="ES_Transport_TCP_Port">9300</para>
+
+    <para name="ES_unicast_hosts">127.0.0.1</para>
+
+    <para name="ES_HTTP_port">9200</para>
+
+    <para name="indexName">mudrod</para>
+
+    <para name="ftpPrefix">FTP.</para>
+    
+    <para name="FTP_type_prefix">rawftp</para>
+
+    <para name="HTTP_type_prefix">rawhttp</para>
+
+    <para name="httpPrefix">WWW.</para>
+
+    <para name="logIndexName">podaaclog</para>
+
+    <para name="metadataLinkageType">MetadataLinkage</para>
+
+    <para name="metadataSVDDimension">50</para>
+
+    <para name="metadataurl">null</para>
+
+    <para name="metadata_w">1</para>
+
+    <para name="mini_userHistory">5</para>
+
+    <!--
+    The ontology service implementation. Possible values include
+     EsipPortal - EsipPortalOntology
+     EsipCOR - EsipCOROntology
+     Local - gov.nasa.jpl.mudrod.ontology.process.Local
+     -->
+    <para name="mudrod.ontology.implementation">Local</para>
+
+    <para name="ontologyLinkageType">SWEETLinkage</para>
+
+    <para name="ontology_w">2</para>
+
+    <!--
+     Log processing type. Possible values include
+     'sequential' or 'parallel'.
+     -->
+    <para name="processingType">parallel</para>
+
+    <para name="raw_metadataType">RawMetadata</para>
+
+    <para name="searchf">100</para>
+
+    <para name="sendingrate">30</para>
+
+    <para name="SessionPort">8080</para>
+
+    <para name="SessionStats_prefix">sessionstats</para>
+
+    <para name="SessionUrl">/mudrod-service/session.html</para>
+
+    <!-- The name of your application. This will appear in the UI and in log 
data.-->
+    <para name="spark.app.name">MudrodSparkApp</para>
+
+    <!--
+    The default Spark cluster manager to connect to. See the list of allowed 
master URL's.
+    For more information, consult 
http://spark.apache.org/docs/latest/submitting-applications.html#master-urls
+    -->
+    <para name="spark.master">local[4]</para>
+
+    <!-- ${svmSgdModel.value} is resolved at build time. See the property in 
core/pom.xml for the value -->
+    <para name="svmSgdModel">${svmSgdModel.value}.zip</para>
+    
+    <para name="timegap">600</para>
+    
+    <para name="userHistoryLinkageType">UserHistoryLinkage</para>
+    
+    <para name="userHistory_w">2</para>
+
+    <para name="viewf">200</para>
+
+
+
+    <!-- FOLLOWING NEEDS TO BE ADDED TO MudrodConstants.java -->
+    <para name="recom_metadataType">RecomMetadata</para>
+    <!-- recommendation -->
+    <para name="metadataTermTFIDFSimType">MetadataTermTFIDFSim</para>
+    <para name="metadataWordTFIDFSimType">MetadataWordTFIDFSim</para>
+    <para name="metadataCodeSimType">MetadataCodeSim</para>
+    <para name="metadataSessionBasedSimType">MetadataSBSim</para>
+    <para name="metadataTopicSimType">MetadataTBSim</para>
+    <!--
+    Log processing parallel optimization type. Possible values include
+    default - MudrodConstants.PARALLEL_OPTIMIZATION_DEFAULT
+    repartition - MudrodConstants.PARALLEL_OPTIMIZATION_REPARTITION
+    -->
+    <para name="parallelOptimization">repartition</para>
+
+</Config>

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/elastic_mappings.json
----------------------------------------------------------------------
diff --git a/core/src/main/resources/elastic_mappings.json 
b/core/src/main/resources/elastic_mappings.json
new file mode 100644
index 0000000..685f49e
--- /dev/null
+++ b/core/src/main/resources/elastic_mappings.json
@@ -0,0 +1,68 @@
+{
+  "_default_": {
+    "properties": {
+      "keywords": {
+        "type": "text",
+        "analyzer": "csv",
+        "fielddata": true
+      },
+      "views": {
+        "type": "string",
+        "analyzer": "csv"
+      },
+      "downloads": {
+        "type": "string",
+        "analyzer": "csv"
+      },
+      "RequestUrl": {
+        "type": "string",
+        "include_in_all": false,
+        "index": "no"
+      },
+      "IP": {
+        "type": "keyword",
+        "index": "not_analyzed"
+      },
+      "Browser": {
+        "type": "string",
+        "include_in_all": false,
+        "index": "no"
+      },
+      "SessionURL": {
+        "type": "string",
+        "include_in_all": false,
+        "index": "no"
+      },
+      "Referer": {
+        "type": "string",
+        "index": "not_analyzed"
+      },
+      "SessionID": {
+        "type": "string",
+        "index": "not_analyzed"
+      },
+      "Response": {
+        "type": "string",
+        "include_in_all": false,
+        "index": "no"
+      },
+      "Request": {
+        "type": "string",
+        "include_in_all": false,
+        "index": "no"
+      },
+      "Coordinates": {
+        "type": "geo_point",
+        "include_in_all": false,
+        "index": "no"
+      },
+      "LogType": {
+        "type": "string",
+        "index": "not_analyzed"
+      },
+      "Dataset-Metadata": {
+        "type": "completion"
+      }
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/elastic_settings.json
----------------------------------------------------------------------
diff --git a/core/src/main/resources/elastic_settings.json 
b/core/src/main/resources/elastic_settings.json
new file mode 100644
index 0000000..f5faa3e
--- /dev/null
+++ b/core/src/main/resources/elastic_settings.json
@@ -0,0 +1,36 @@
+{
+  "index": {
+    "number_of_replicas": 0,
+    "refresh_interval": "-1",
+    "number_of_shards": "5",
+    "translog.flush_threshold_size": "1g",
+    "translog.sync_interval": "30s",
+    "warmer.enabled": "false"
+  },
+  "analysis": {
+    "filter": {
+      "cody_stop": {
+        "type": "stop",
+        "stopwords": "_english_"
+      },
+      "cody_stemmer": {
+        "type": "stemmer",
+        "language": "light_english"
+      }
+    },
+    "analyzer": {
+      "cody": {
+        "tokenizer": "standard",
+        "filter": [
+          "lowercase",
+          "cody_stop",
+          "cody_stemmer"
+        ]
+      },
+      "csv": {
+        "type": "pattern",
+        "pattern": ","
+      }
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/javaSVMWithSGDModel/data/_SUCCESS
----------------------------------------------------------------------
diff --git a/core/src/main/resources/javaSVMWithSGDModel/data/_SUCCESS 
b/core/src/main/resources/javaSVMWithSGDModel/data/_SUCCESS
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/javaSVMWithSGDModel/data/_common_metadata
----------------------------------------------------------------------
diff --git a/core/src/main/resources/javaSVMWithSGDModel/data/_common_metadata 
b/core/src/main/resources/javaSVMWithSGDModel/data/_common_metadata
new file mode 100644
index 0000000..cafbf1b
Binary files /dev/null and 
b/core/src/main/resources/javaSVMWithSGDModel/data/_common_metadata differ

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/javaSVMWithSGDModel/data/_metadata
----------------------------------------------------------------------
diff --git a/core/src/main/resources/javaSVMWithSGDModel/data/_metadata 
b/core/src/main/resources/javaSVMWithSGDModel/data/_metadata
new file mode 100644
index 0000000..6bfec98
Binary files /dev/null and 
b/core/src/main/resources/javaSVMWithSGDModel/data/_metadata differ

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/javaSVMWithSGDModel/data/part-r-00000-e008ae03-6b61-4931-ba29-27304de5a584.gz.parquet
----------------------------------------------------------------------
diff --git 
a/core/src/main/resources/javaSVMWithSGDModel/data/part-r-00000-e008ae03-6b61-4931-ba29-27304de5a584.gz.parquet
 
b/core/src/main/resources/javaSVMWithSGDModel/data/part-r-00000-e008ae03-6b61-4931-ba29-27304de5a584.gz.parquet
new file mode 100644
index 0000000..5033301
Binary files /dev/null and 
b/core/src/main/resources/javaSVMWithSGDModel/data/part-r-00000-e008ae03-6b61-4931-ba29-27304de5a584.gz.parquet
 differ

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/javaSVMWithSGDModel/metadata/_SUCCESS
----------------------------------------------------------------------
diff --git a/core/src/main/resources/javaSVMWithSGDModel/metadata/_SUCCESS 
b/core/src/main/resources/javaSVMWithSGDModel/metadata/_SUCCESS
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/javaSVMWithSGDModel/metadata/part-00000
----------------------------------------------------------------------
diff --git a/core/src/main/resources/javaSVMWithSGDModel/metadata/part-00000 
b/core/src/main/resources/javaSVMWithSGDModel/metadata/part-00000
new file mode 100644
index 0000000..c972cbe
--- /dev/null
+++ b/core/src/main/resources/javaSVMWithSGDModel/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.mllib.classification.SVMModel","version":"1.0","numFeatures":6,"numClasses":2}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/core/src/main/resources/log4j.properties 
b/core/src/main/resources/log4j.properties
new file mode 100644
index 0000000..1e6e84d
--- /dev/null
+++ b/core/src/main/resources/log4j.properties
@@ -0,0 +1,63 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); 
+# you may not use this file except in compliance with the License. 
+# You may obtain  a copy of the License at 
+#  
+# http://www.apache.org/licenses/LICENSE-2.0 Unless 
+#  
+# required by applicable law or agreed to in writing, software 
+# distributed under the License is distributed on an "AS IS" 
+# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
+# express or implied. See the License for the specific language 
+# governing permissions and limitations under the License. 
+# Define some default values that can be overridden by system properties
+# Logging Threshold
+mudrod.root.logger=INFO,DRFA, stdout
+mudrod.log.dir=.
+mudrod.log.file=mudrod.log
+log4j.threshhold=ALL
+# RootLogger - DailyRollingFileAppender
+log4j.rootLogger=${mudrod.root.logger}
+#special logging requirements for some commandline tools
+log4j.logger.MudrodEngine=INFO,cmdstdout
+#
+# Daily Rolling File Appender
+#
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${mudrod.log.dir}/${mudrod.log.file}
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+# Debugging Pattern format: Date LogLevel LoggerName 
(FileName:MethodName:LineNo) LogMessage
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} 
(%F:%M(%L)) - %m%n
+#
+# stdout
+# Add *stdout* to rootlogger above if you want to use this 
+#
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} 
(%F:%M(%L)) - %m%n
+#
+# plain layout used for commandline tools to output to console
+#
+log4j.appender.cmdstdout=org.apache.log4j.ConsoleAppender
+log4j.appender.cmdstdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.cmdstdout.layout.ConversionPattern=%m%n
+#
+# Rolling File Appender
+#
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} 
(%F:%M(%L)) - %m%n
+# Custom Logging levels
+log4j.logger.akka=WARN
+log4j.logger.org.apache=WARN
+log4j.logger.gov.nasa.jpl.mudrod=INFO

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/7b76fa16/core/src/main/resources/log4j2.properties
----------------------------------------------------------------------
diff --git a/core/src/main/resources/log4j2.properties 
b/core/src/main/resources/log4j2.properties
new file mode 100644
index 0000000..1e6e84d
--- /dev/null
+++ b/core/src/main/resources/log4j2.properties
@@ -0,0 +1,63 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); 
+# you may not use this file except in compliance with the License. 
+# You may obtain  a copy of the License at 
+#  
+# http://www.apache.org/licenses/LICENSE-2.0 Unless 
+#  
+# required by applicable law or agreed to in writing, software 
+# distributed under the License is distributed on an "AS IS" 
+# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
+# express or implied. See the License for the specific language 
+# governing permissions and limitations under the License. 
+# Define some default values that can be overridden by system properties
+# Logging Threshold
+mudrod.root.logger=INFO,DRFA, stdout
+mudrod.log.dir=.
+mudrod.log.file=mudrod.log
+log4j.threshhold=ALL
+# RootLogger - DailyRollingFileAppender
+log4j.rootLogger=${mudrod.root.logger}
+#special logging requirements for some commandline tools
+log4j.logger.MudrodEngine=INFO,cmdstdout
+#
+# Daily Rolling File Appender
+#
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${mudrod.log.dir}/${mudrod.log.file}
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+# Debugging Pattern format: Date LogLevel LoggerName 
(FileName:MethodName:LineNo) LogMessage
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} 
(%F:%M(%L)) - %m%n
+#
+# stdout
+# Add *stdout* to rootlogger above if you want to use this 
+#
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} 
(%F:%M(%L)) - %m%n
+#
+# plain layout used for commandline tools to output to console
+#
+log4j.appender.cmdstdout=org.apache.log4j.ConsoleAppender
+log4j.appender.cmdstdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.cmdstdout.layout.ConversionPattern=%m%n
+#
+# Rolling File Appender
+#
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} 
(%F:%M(%L)) - %m%n
+# Custom Logging levels
+log4j.logger.akka=WARN
+log4j.logger.org.apache=WARN
+log4j.logger.gov.nasa.jpl.mudrod=INFO

[14/21] incubator-sdap-mudrod git commit: SDAP-1 Import all code under the SDAP SGA

Reply via email to