This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-mudrod.git


The following commit(s) were added to refs/heads/master by this push:
     new c2dce18  SDAP-55 (#15)
c2dce18 is described below

commit c2dce1846ef1742601cb89db9271e03857a9978f
Author: quintinali <[email protected]>
AuthorDate: Mon May 14 04:37:05 2018 +0800

    SDAP-55 (#15)
    
    * clean weblog module
    
    (1) remove sequential log processing code
    (2) fix bugs of clickstream extration
    
    * clean code
    
    remove useless blank line
    
    * try to solve conflicts
---
 .../apache/sdap/mudrod/main/MudrodConstants.java   |   2 -
 .../recommendation/pre/SessionCooccurence.java     |   2 +-
 .../mudrod/weblog/pre/ClickStreamGenerator.java    |   4 +-
 .../sdap/mudrod/weblog/pre/CrawlerDetection.java   |  36 +---
 .../sdap/mudrod/weblog/pre/ImportLogFile.java      | 195 +--------------------
 .../weblog/pre/RankingTrainDataGenerator.java      |   4 +-
 .../sdap/mudrod/weblog/pre/SessionGenerator.java   |  16 +-
 .../sdap/mudrod/weblog/pre/SessionStatistic.java   |  22 +--
 .../structure/{ => log}/ApacheAccessLog.java       |   2 +-
 .../weblog/structure/{ => log}/Coordinates.java    |   2 +-
 .../mudrod/weblog/structure/{ => log}/FtpLog.java  |   2 +-
 .../mudrod/weblog/structure/{ => log}/GeoIp.java   |   2 +-
 .../weblog/structure/{ => log}/RequestUrl.java     |   2 +-
 .../mudrod/weblog/structure/{ => log}/WebLog.java  |   2 +-
 .../structure/{ => session}/ClickStream.java       |   2 +-
 .../structure/{ => session}/RankingTrainData.java  |   2 +-
 .../weblog/structure/{ => session}/Session.java    |   8 +-
 .../structure/{ => session}/SessionExtractor.java  |  44 +----
 .../structure/{ => session}/SessionNode.java       |  22 +--
 .../structure/{ => session}/SessionTree.java       |  24 +--
 core/src/main/resources/config.properties          |   2 +-
 .../services/search/SessionDetailResource.java     |   2 +-
 22 files changed, 61 insertions(+), 338 deletions(-)

diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/main/MudrodConstants.java 
b/core/src/main/java/org/apache/sdap/mudrod/main/MudrodConstants.java
index 0c8bcc2..84ba347 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/main/MudrodConstants.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/main/MudrodConstants.java
@@ -98,8 +98,6 @@ public interface MudrodConstants {
   
   public static final String ONTOLOGY_INPUT_PATH = 
"mudrod.ontology.input.path";
 
-  public static final String PROCESS_TYPE = "mudrod.processing.type";
-
   /** Defined on CLI */
   public static final String METADATA_DOWNLOAD = "mudrod.metadata.download";
   
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/recommendation/pre/SessionCooccurence.java
 
b/core/src/main/java/org/apache/sdap/mudrod/recommendation/pre/SessionCooccurence.java
index a4a79e0..4a5d4bb 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/recommendation/pre/SessionCooccurence.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/recommendation/pre/SessionCooccurence.java
@@ -19,7 +19,7 @@ import org.apache.sdap.mudrod.driver.SparkDriver;
 import org.apache.sdap.mudrod.main.MudrodConstants;
 import org.apache.sdap.mudrod.utils.LabeledRowMatrix;
 import org.apache.sdap.mudrod.utils.MatrixUtil;
-import org.apache.sdap.mudrod.weblog.structure.SessionExtractor;
+import org.apache.sdap.mudrod.weblog.structure.session.SessionExtractor;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.function.PairFunction;
 import org.elasticsearch.action.search.SearchResponse;
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ClickStreamGenerator.java
 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ClickStreamGenerator.java
index 886cd4a..2c38d29 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ClickStreamGenerator.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ClickStreamGenerator.java
@@ -19,8 +19,8 @@ import org.apache.sdap.mudrod.driver.SparkDriver;
 import org.apache.sdap.mudrod.main.MudrodConstants;
 import org.apache.sdap.mudrod.utils.LabeledRowMatrix;
 import org.apache.sdap.mudrod.utils.MatrixUtil;
-import org.apache.sdap.mudrod.weblog.structure.ClickStream;
-import org.apache.sdap.mudrod.weblog.structure.SessionExtractor;
+import org.apache.sdap.mudrod.weblog.structure.session.ClickStream;
+import org.apache.sdap.mudrod.weblog.structure.session.SessionExtractor;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.slf4j.Logger;
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/CrawlerDetection.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/CrawlerDetection.java
index 3e782a7..704ccfd 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/CrawlerDetection.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/CrawlerDetection.java
@@ -79,7 +79,7 @@ public class CrawlerDetection extends LogAbstract {
     LOG.info("Starting Crawler detection {}.", httpType);
     startTime = System.currentTimeMillis();
     try {
-      checkByRate();
+      checkByRateInParallel();
     } catch (InterruptedException | IOException e) {
       LOG.error("Encountered an error whilst detecting Web crawlers.", e);
     }
@@ -103,40 +103,6 @@ public class CrawlerDetection extends LogAbstract {
     return false;
   }
 
-  public void checkByRate() throws InterruptedException, IOException {
-    String processingType = props.getProperty(MudrodConstants.PROCESS_TYPE);
-    if (processingType.equals("sequential")) {
-      checkByRateInSequential();
-    } else if (processingType.equals("parallel")) {
-      checkByRateInParallel();
-    }
-  }
-
-  /**
-   * Check crawler by request sending rate, which is read from configruation
-   * file
-   *
-   * @throws InterruptedException InterruptedException
-   * @throws IOException          IOException
-   */
-  public void checkByRateInSequential() throws InterruptedException, 
IOException {
-    es.createBulkProcessor();
-
-    int rate = 
Integer.parseInt(props.getProperty(MudrodConstants.REQUEST_RATE));
-
-    Terms users = this.getUserTerms(this.httpType);
-    LOG.info("Original User count: {}", 
Integer.toString(users.getBuckets().size()));
-
-    int userCount = 0;
-    for (Terms.Bucket entry : users.getBuckets()) {
-      String user = entry.getKey().toString();
-      int count = checkByRate(es, user);
-      userCount += count;
-    }
-    es.destroyBulkProcessor();
-    LOG.info("User count: {}", Integer.toString(userCount));
-  }
-
   void checkByRateInParallel() throws InterruptedException, IOException {
 
     JavaRDD<String> userRDD = getUserRDD(this.httpType);
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java
index c7622d6..6e5f473 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/ImportLogFile.java
@@ -16,8 +16,8 @@ package org.apache.sdap.mudrod.weblog.pre;
 import org.apache.sdap.mudrod.driver.ESDriver;
 import org.apache.sdap.mudrod.driver.SparkDriver;
 import org.apache.sdap.mudrod.main.MudrodConstants;
-import org.apache.sdap.mudrod.weblog.structure.ApacheAccessLog;
-import org.apache.sdap.mudrod.weblog.structure.FtpLog;
+import org.apache.sdap.mudrod.weblog.structure.log.ApacheAccessLog;
+import org.apache.sdap.mudrod.weblog.structure.log.FtpLog;
 import org.apache.spark.api.java.JavaRDD;
 import org.elasticsearch.action.index.IndexRequest;
 import org.elasticsearch.spark.rdd.api.java.JavaEsSpark;
@@ -55,6 +55,11 @@ public class ImportLogFile extends LogAbstract {
   public static final int NUM_FIELDS = 9;
   Pattern p = Pattern.compile(logEntryPattern);
   transient Matcher matcher;
+  
+  @Override
+  public Object execute(Object o) {
+    return null;
+  }
 
   /**
    * Constructor supporting a number of parameters documented below.
@@ -146,31 +151,7 @@ public class ImportLogFile extends LogAbstract {
       return;
     }
 
-    String processingType = props.getProperty(MudrodConstants.PROCESS_TYPE, 
"parallel");
-    if (processingType.equals("sequential")) {
-      readFileInSequential(httplogpath, ftplogpath);
-    } else if (processingType.equals("parallel")) {
-      readFileInParallel(httplogpath, ftplogpath);
-    }
-  }
-
-  /**
-   * Read the FTP or HTTP log path with the intention of processing lines from
-   * log files.
-   *
-   * @param httplogpath path to the parent directory containing http logs
-   * @param ftplogpath  path to the parent directory containing ftp logs
-   */
-  public void readFileInSequential(String httplogpath, String ftplogpath) {
-    es.createBulkProcessor();
-    try {
-      readLogFile(httplogpath, "http", logIndex, httpType);
-      readLogFile(ftplogpath, "FTP", logIndex, ftpType);
-
-    } catch (IOException e) {
-      LOG.error("Error whilst reading log file.", e);
-    }
-    es.destroyBulkProcessor();
+    readFileInParallel(httplogpath, ftplogpath);
   }
 
   /**
@@ -181,7 +162,6 @@ public class ImportLogFile extends LogAbstract {
    * @param ftplogpath  path to the parent directory containing ftp logs
    */
   public void readFileInParallel(String httplogpath, String ftplogpath) {
-
     importHttpfile(httplogpath);
     importFtpfile(ftplogpath);
   }
@@ -189,171 +169,12 @@ public class ImportLogFile extends LogAbstract {
   public void importHttpfile(String httplogpath) {
     // import http logs
     JavaRDD<String> accessLogs = spark.sc.textFile(httplogpath, 
this.partition).map(s -> ApacheAccessLog.parseFromLogLine(s, 
props)).filter(ApacheAccessLog::checknull);
-
     JavaEsSpark.saveJsonToEs(accessLogs, logIndex + "/" + this.httpType);
   }
 
   public void importFtpfile(String ftplogpath) {
     // import ftp logs
     JavaRDD<String> ftpLogs = spark.sc.textFile(ftplogpath, 
this.partition).map(s -> FtpLog.parseFromLogLine(s, 
props)).filter(FtpLog::checknull);
-
     JavaEsSpark.saveJsonToEs(ftpLogs, logIndex + "/" + this.ftpType);
   }
-
-  /**
-   * Process a log path on local file system which contains the relevant
-   * parameters as below.
-   *
-   * @param fileName the {@link java.lang.String} path to the log directory on 
file
-   *                 system
-   * @param protocol whether to process 'http' or 'FTP'
-   * @param index    the index name to write logs to
-   * @param type     one of the available protocols from which Mudrod logs are 
obtained.
-   * @throws IOException if there is an error reading anything from the 
fileName provided.
-   */
-  public void readLogFile(String fileName, String protocol, String index, 
String type) throws IOException {
-    BufferedReader br = new BufferedReader(new FileReader(fileName));
-    int count = 0;
-    try {
-      String line = br.readLine();
-      while (line != null) {
-        if ("FTP".equals(protocol)) {
-          parseSingleLineFTP(line, index, type);
-        } else {
-          parseSingleLineHTTP(line, index, type);
-        }
-        line = br.readLine();
-        count++;
-      }
-    } catch (FileNotFoundException e) {
-      LOG.error("File not found.", e);
-    } catch (IOException e) {
-      LOG.error("Error reading input directory.", e);
-    } finally {
-      br.close();
-      LOG.info("Num of {} entries:\t{}", protocol, count);
-    }
-  }
-
-  /**
-   * Parse a single FTP log entry
-   *
-   * @param log   a single log line
-   * @param index the index name we wish to persist the log line to
-   * @param type  one of the available protocols from which Mudrod logs are 
obtained.
-   */
-  public void parseSingleLineFTP(String log, String index, String type) {
-    String ip = log.split(" +")[6];
-
-    String time = log.split(" +")[1] + ":" + log.split(" +")[2] + ":" + 
log.split(" +")[3] + ":" + log.split(" +")[4];
-
-    time = switchtoNum(time);
-    SimpleDateFormat formatter = new SimpleDateFormat("MM:dd:HH:mm:ss:yyyy");
-    Date date = null;
-    try {
-      date = formatter.parse(time);
-    } catch (ParseException e) {
-      LOG.error("Error whilst parsing the date.", e);
-    }
-    String bytes = log.split(" +")[7];
-
-    String request = log.split(" +")[8].toLowerCase();
-
-    if (!request.contains("/misc/") && !request.contains("readme")) {
-      IndexRequest ir;
-      try {
-        ir = new IndexRequest(index, type)
-            .source(jsonBuilder()
-                    .startObject()
-                    .field("LogType", MudrodConstants.FTP_LOG)
-                    .field("IP", ip)
-                    .field("Time", date)
-                    .field("Request", request)
-                    .field("Bytes", Long.parseLong(bytes))
-                    .endObject());
-        es.getBulkProcessor().add(ir);
-      } catch (NumberFormatException e) {
-        LOG.error("Error whilst processing numbers", e);
-      } catch (IOException e) {
-        LOG.error("IOError whilst adding to the bulk processor.", e);
-      }
-    }
-
-  }
-
-  /**
-   * Parse a single HTTP log entry
-   *
-   * @param log   a single log line
-   * @param index the index name we wish to persist the log line to
-   * @param type  one of the available protocols from which Mudrod logs are 
obtained.
-   */
-  public void parseSingleLineHTTP(String log, String index, String type) {
-    matcher = p.matcher(log);
-    if (!matcher.matches() || NUM_FIELDS != matcher.groupCount()) {
-      return;
-    }
-    String time = matcher.group(4);
-    time = switchtoNum(time);
-    SimpleDateFormat formatter = new SimpleDateFormat("dd/MM/yyyy:HH:mm:ss");
-    Date date = null;
-    try {
-      date = formatter.parse(time);
-    } catch (ParseException e) {
-      LOG.error("Error whilst attempting to parse date.", e);
-    }
-
-    String bytes = matcher.group(7);
-    if ("-".equals(bytes)) {
-      bytes = "0";
-    }
-
-    String request = matcher.group(5).toLowerCase();
-    String agent = matcher.group(9);
-    CrawlerDetection crawlerDe = new CrawlerDetection(this.props, this.es, 
this.spark);
-    if (!crawlerDe.checkKnownCrawler(agent)) {
-      boolean tag = false;
-      String[] mimeTypes = 
props.getProperty(MudrodConstants.BLACK_LIST_REQUEST).split(",");
-      for(String str:mimeTypes) {
-        if (request.contains(str.trim())) {
-          tag = true;
-          break;
-        }
-      }
-
-      if (!tag) {
-        IndexRequest ir = null;
-        executeBulkRequest(ir, index, type, matcher, date, bytes);
-      }
-    }
-  }
-
-  private void executeBulkRequest(IndexRequest ir, String index, String type, 
Matcher matcher, Date date, String bytes) {
-    IndexRequest newIr = ir;
-    try {
-      newIr = new IndexRequest(index, type)
-              .source(jsonBuilder()
-                      .startObject()
-                      .field("LogType", MudrodConstants.HTTP_LOG)
-                      .field("IP", matcher.group(1))
-                      .field("Time", date)
-                      .field("Request", matcher.group(5))
-                      .field("Response", matcher.group(6))
-                      .field("Bytes", Integer.parseInt(bytes))
-                      .field("Referer", matcher.group(8))
-                      .field("Browser", matcher.group(9))
-                      .endObject());
-
-      es.getBulkProcessor().add(newIr);
-    } catch (NumberFormatException e) {
-      LOG.error("Error whilst processing numbers", e);
-    } catch (IOException e) {
-      LOG.error("IOError whilst adding to the bulk processor.", e);
-    }
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
 }
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/RankingTrainDataGenerator.java
 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/RankingTrainDataGenerator.java
index 766e853..de41d56 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/RankingTrainDataGenerator.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/RankingTrainDataGenerator.java
@@ -3,8 +3,8 @@ package org.apache.sdap.mudrod.weblog.pre;
 import org.apache.sdap.mudrod.discoveryengine.DiscoveryStepAbstract;
 import org.apache.sdap.mudrod.driver.ESDriver;
 import org.apache.sdap.mudrod.driver.SparkDriver;
-import org.apache.sdap.mudrod.weblog.structure.RankingTrainData;
-import org.apache.sdap.mudrod.weblog.structure.SessionExtractor;
+import org.apache.sdap.mudrod.weblog.structure.session.RankingTrainData;
+import org.apache.sdap.mudrod.weblog.structure.session.SessionExtractor;
 import org.apache.spark.api.java.JavaRDD;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionGenerator.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionGenerator.java
index 4e170d7..4ce1535 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionGenerator.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionGenerator.java
@@ -16,7 +16,7 @@ package org.apache.sdap.mudrod.weblog.pre;
 import org.apache.sdap.mudrod.driver.ESDriver;
 import org.apache.sdap.mudrod.driver.SparkDriver;
 import org.apache.sdap.mudrod.main.MudrodConstants;
-import org.apache.sdap.mudrod.weblog.structure.Session;
+import org.apache.sdap.mudrod.weblog.structure.session.Session;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.Function2;
@@ -93,21 +93,15 @@ public class SessionGenerator extends LogAbstract {
   }
 
   public void genSessionByReferer(int timeThres) throws InterruptedException, 
IOException {
-    String processingType = props.getProperty(MudrodConstants.PROCESS_TYPE);
-    if (processingType.equals("sequential")) {
-      genSessionByRefererInSequential(timeThres);
-    } else if (processingType.equals("parallel")) {
+    
       genSessionByRefererInParallel(timeThres);
-    }
+  
   }
 
   public void combineShortSessions(int timeThres) throws InterruptedException, 
IOException {
-    String processingType = props.getProperty(MudrodConstants.PROCESS_TYPE);
-    if (processingType.equals("sequential")) {
-      combineShortSessionsInSequential(timeThres);
-    } else if (processingType.equals("parallel")) {
+   
       combineShortSessionsInParallel(timeThres);
-    }
+    
   }
 
   /**
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionStatistic.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionStatistic.java
index 3cee9c7..981bece 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionStatistic.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/pre/SessionStatistic.java
@@ -16,7 +16,7 @@ package org.apache.sdap.mudrod.weblog.pre;
 import org.apache.sdap.mudrod.driver.ESDriver;
 import org.apache.sdap.mudrod.driver.SparkDriver;
 import org.apache.sdap.mudrod.main.MudrodConstants;
-import org.apache.sdap.mudrod.weblog.structure.RequestUrl;
+import org.apache.sdap.mudrod.weblog.structure.log.RequestUrl;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.function.FlatMapFunction;
 import org.apache.spark.api.java.function.Function2;
@@ -81,27 +81,7 @@ public class SessionStatistic extends LogAbstract {
   }
 
   public void processSession() throws InterruptedException, IOException, 
ExecutionException {
-    String processingType = props.getProperty(MudrodConstants.PROCESS_TYPE);
-    if ("sequential".equals(processingType)) {
-      processSessionInSequential();
-    } else if ("parallel".equals(processingType)) {
       processSessionInParallel();
-    }
-  }
-
-  public void processSessionInSequential() throws IOException, 
InterruptedException, ExecutionException {
-    es.createBulkProcessor();
-    Terms sessions = this.getSessionTerms();
-    int sessionCount = 0;
-    for (Terms.Bucket entry : sessions.getBuckets()) {
-      if (entry.getDocCount() >= 3 && !"invalid".equals(entry.getKey())) {
-        String sessionid = entry.getKey().toString();
-        int sessionNum = processSession(es, sessionid);
-        sessionCount += sessionNum;
-      }
-    }
-    LOG.info("Final Session count: {}", Integer.toString(sessionCount));
-    es.destroyBulkProcessor();
   }
 
   /**
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/ApacheAccessLog.java
 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/ApacheAccessLog.java
similarity index 98%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/ApacheAccessLog.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/ApacheAccessLog.java
index 050d19d..8224b33 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/ApacheAccessLog.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/ApacheAccessLog.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.log;
 
 import com.google.gson.Gson;
 
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/Coordinates.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/Coordinates.java
similarity index 92%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/Coordinates.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/Coordinates.java
index 5e6fd07..43a7642 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/Coordinates.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/Coordinates.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.log;
 
 public class Coordinates {
   /*
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/FtpLog.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/FtpLog.java
similarity index 97%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/FtpLog.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/FtpLog.java
index 9f39655..91fa228 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/FtpLog.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/FtpLog.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.log;
 
 import com.google.gson.Gson;
 
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/GeoIp.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/GeoIp.java
similarity index 96%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/GeoIp.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/GeoIp.java
index d3e94dc..154cd98 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/GeoIp.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/GeoIp.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.log;
 
 import org.apache.sdap.mudrod.utils.HttpRequest;
 
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/RequestUrl.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/RequestUrl.java
similarity index 99%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/RequestUrl.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/RequestUrl.java
index 05a3395..0c365a4 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/RequestUrl.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/RequestUrl.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.log;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/WebLog.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/WebLog.java
similarity index 97%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/WebLog.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/WebLog.java
index 93a9747..9046992 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/WebLog.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/log/WebLog.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.log;
 
 import java.io.Serializable;
 
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/ClickStream.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/ClickStream.java
similarity index 98%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/ClickStream.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/ClickStream.java
index 2f0c34d..7e5069e 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/ClickStream.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/ClickStream.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.session;
 
 import org.codehaus.jettison.json.JSONException;
 import org.codehaus.jettison.json.JSONObject;
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/RankingTrainData.java
 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/RankingTrainData.java
similarity index 98%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/RankingTrainData.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/RankingTrainData.java
index cf4ec23..bdf477a 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/RankingTrainData.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/RankingTrainData.java
@@ -1,4 +1,4 @@
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.session;
 
 import java.io.Serializable;
 import java.util.Map;
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/Session.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/Session.java
similarity index 97%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/Session.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/Session.java
index 31bef0c..2c917a6 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/Session.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/Session.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.session;
 
 import com.google.gson.Gson;
 import com.google.gson.JsonElement;
@@ -188,8 +188,10 @@ public class Session /*extends MudrodAbstract*/ implements 
Comparable<Session> {
     } catch (UnsupportedEncodingException e) {
       LOG.error("Erro whilst obtaining the Session Tree: {}", e);
     }
+    
+    //tree.printTree(tree.root);
 
-    List<ClickStream> clickthroughs = tree.getClickStreamList();
+    List<ClickStream> clickthroughs = tree.getClickStreamList(props);
     return clickthroughs;
   }
 
@@ -215,7 +217,7 @@ public class Session /*extends MudrodAbstract*/ implements 
Comparable<Session> {
       String logType = (String) result.get("LogType");
       String referer = (String) result.get("Referer");
 
-      SessionNode node = new SessionNode(request, logType, referer, 
props.getProperty(MudrodConstants.BASE_URL), time, seq);
+      SessionNode node = new SessionNode(props, request, logType, referer, 
time, seq);
       tree.insert(node);
       seq++;
     }
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionExtractor.java
 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionExtractor.java
similarity index 90%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionExtractor.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionExtractor.java
index 85b6961..f7eb602 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionExtractor.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionExtractor.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.session;
 
 import org.apache.sdap.mudrod.driver.ESDriver;
 import org.apache.sdap.mudrod.driver.SparkDriver;
@@ -70,47 +70,7 @@ public class SessionExtractor implements Serializable {
    * @return clickstream list in JavaRDD format {@link ClickStream}
    */
   public JavaRDD<ClickStream> extractClickStreamFromES(Properties props, 
ESDriver es, SparkDriver spark) {
-    switch (props.getProperty(MudrodConstants.PROCESS_TYPE)) {
-      case "sequential":
-        List<ClickStream> queryList = this.getClickStreamList(props, es);
-        return spark.sc.parallelize(queryList);
-      case "parallel":
-        return getClickStreamListInParallel(props, spark, es);
-      default:
-      LOG.error("Error finding processing type for '{}'. Please check your 
config.xml.", props.getProperty(MudrodConstants.PROCESS_TYPE));
-    }
-    return null;
-  }
-
-  /**
-   * getClickStreamList:Extract click streams from logs stored in 
Elasticsearch.
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch driver
-   * @return clickstream list {@link ClickStream}
-   */
-  protected List<ClickStream> getClickStreamList(Properties props, ESDriver 
es) {
-    List<String> logIndexList = 
es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
-
-    List<ClickStream> result = new ArrayList<>();
-    for (String logIndex : logIndexList) {
-      List<String> sessionIdList;
-      try {
-        sessionIdList = this.getSessions(props, es, logIndex);
-        Session session = new Session(props, es);
-        for (String aSessionIdList : sessionIdList) {
-          String[] sArr = aSessionIdList.split(",");
-          List<ClickStream> datas = session.getClickStreamList(sArr[1], 
sArr[2], sArr[0]);
-          result.addAll(datas);
-        }
-      } catch (Exception e) {
-        LOG.error("Error during extraction of Clickstreams from log index. 
{}", e);
-      }
-    }
-
-    return result;
+       return getClickStreamListInParallel(props, spark, es);
   }
 
   protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties 
props, SparkDriver spark, ESDriver es) {
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionNode.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionNode.java
similarity index 91%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionNode.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionNode.java
index 6378615..91a29e1 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionNode.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionNode.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.session;
 
 import java.io.UnsupportedEncodingException;
 import java.net.URLDecoder;
@@ -19,6 +19,8 @@ import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.sdap.mudrod.main.MudrodConstants;
+
 /**
  * ClassName: SessionNode Function: Functions related to a node in a session
  * tree sturcture.
@@ -65,13 +67,13 @@ public class SessionNode {
    * @param time:    request time of node
    * @param seq:     sequence of this node
    */
-  public SessionNode(String request, String logType, String referer, String 
basicUrl, String time, int seq) {
+  public SessionNode(Properties props, String request, String logType, String 
referer, String time, int seq) {
     this.logType = logType;
     this.time = time;
     this.seq = seq;
     this.setRequest(request);
-    this.setReferer(referer, basicUrl);
-    this.setKey(request, logType);
+    this.setReferer(referer, props.getProperty(MudrodConstants.BASE_URL));
+    this.setKey(props, request, logType);
   }
 
   /**
@@ -94,7 +96,7 @@ public class SessionNode {
    */
   public void setRequest(String req) {
     this.request = req;
-    if (this.logType.equals("PO.DAAC")) {
+    if (this.logType.equals(MudrodConstants.HTTP_LOG)) {
       this.parseRequest(req);
     }
   }
@@ -156,19 +158,19 @@ public class SessionNode {
    * @param request request url
    * @param logType url type
    */
-  public void setKey(String request, String logType) {
+  public void setKey(Properties props, String request, String logType) {
     this.key = "";
-    String datasetlist = "/datasetlist?";
-    String dataset = "/dataset/";
+    String datasetlist = props.getProperty(MudrodConstants.SEARCH_MARKER);
+    String dataset = props.getProperty(MudrodConstants.VIEW_MARKER);
     if (logType.equals("ftp")) {
       this.key = "ftp";
     } else if (logType.equals("root")) {
       this.key = "root";
     } else {
       if (request.contains(datasetlist)) {
-        this.key = "datasetlist";
+        this.key = MudrodConstants.SEARCH_MARKER;
       } else if (request.contains(dataset) /* || request.contains(granule) */) 
{
-        this.key = "dataset";
+        this.key = MudrodConstants.VIEW_MARKER;
       }
     }
   }
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionTree.java 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionTree.java
similarity index 95%
rename from 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionTree.java
rename to 
core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionTree.java
index 7d31129..5531f83 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/SessionTree.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/weblog/structure/session/SessionTree.java
@@ -11,7 +11,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.sdap.mudrod.weblog.structure;
+package org.apache.sdap.mudrod.weblog.structure.session;
 
 import com.google.gson.Gson;
 import com.google.gson.JsonElement;
@@ -20,7 +20,7 @@ import com.google.gson.JsonObject;
 import org.apache.sdap.mudrod.discoveryengine.MudrodAbstract;
 import org.apache.sdap.mudrod.driver.ESDriver;
 import org.apache.sdap.mudrod.main.MudrodConstants;
-
+import org.apache.sdap.mudrod.weblog.structure.log.RequestUrl;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -68,7 +68,7 @@ public class SessionTree extends MudrodAbstract {
    */
   public SessionTree(Properties props, ESDriver es, SessionNode rootData, 
String sessionID, String cleanupType) {
     super(props, es, null);
-    root = new SessionNode("root", "root", "", 
props.getProperty(MudrodConstants.BASE_URL), "", 0);
+    root = new SessionNode(props, "root", "root", "", "", 0);
     tmpnode = root;
     this.sessionID = sessionID;
     this.cleanupType = cleanupType;
@@ -84,7 +84,7 @@ public class SessionTree extends MudrodAbstract {
    */
   public SessionTree(Properties props, ESDriver es, String sessionID, String 
cleanupType) {
     super(props, es, null);
-    root = new SessionNode("root", "root", "", 
props.getProperty(MudrodConstants.BASE_URL), "", 0);
+    root = new SessionNode(props, "root", "root", "", "", 0);
     root.setParent(root);
     tmpnode = root;
     this.sessionID = sessionID;
@@ -99,15 +99,15 @@ public class SessionTree extends MudrodAbstract {
    */
   public SessionNode insert(SessionNode node) {
     // begin with datasetlist
-    if 
(props.getProperty(MudrodConstants.SEARCH_MARKER).equals(node.getKey())) {
+    if (MudrodConstants.SEARCH_MARKER.equals(node.getKey())) {
       this.binsert = true;
     }
     if (!this.binsert) {
       return null;
     }
     // remove unrelated node
-    if 
(!props.getProperty(MudrodConstants.SEARCH_MARKER).equals(node.getKey()) &&
-            
!props.getProperty(MudrodConstants.VIEW_MARKER).equals(node.getKey()) &&
+    if (!MudrodConstants.SEARCH_MARKER.equals(node.getKey()) &&
+            !MudrodConstants.VIEW_MARKER.equals(node.getKey()) &&
             !MudrodConstants.FTP_LOG.equals(node.getKey())) {
       return null;
     }
@@ -125,7 +125,7 @@ public class SessionTree extends MudrodAbstract {
 
     // record insert node
     tmpnode = node;
-    if ("dataset".equals(node.getKey())) {
+    if (MudrodConstants.VIEW_MARKER.equals(node.getKey())) {
       latestDatasetnode = node;
     }
 
@@ -190,7 +190,7 @@ public class SessionTree extends MudrodAbstract {
    *
    * @return {@link ClickStream}
    */
-  public List<ClickStream> getClickStreamList() {
+  public List<ClickStream> getClickStreamList(Properties props) {
 
     List<ClickStream> clickthroughs = new ArrayList<>();
     List<SessionNode> viewnodes = this.getViewNodes(this.root);
@@ -198,7 +198,7 @@ public class SessionTree extends MudrodAbstract {
       SessionNode parent = viewnode.getParent();
       List<SessionNode> children = viewnode.getChildren();
 
-      if (!"datasetlist".equals(parent.getKey())) {
+      if (!MudrodConstants.SEARCH_MARKER.equals(parent.getKey())) {
         continue;
       }
 
@@ -413,7 +413,7 @@ public class SessionTree extends MudrodAbstract {
   private List<SessionNode> getViewNodes(SessionNode node) {
 
     List<SessionNode> viewnodes = new ArrayList<>();
-    if ("dataset".equals(node.getKey())) {
+    if (MudrodConstants.VIEW_MARKER.equals(node.getKey())) {
       viewnodes.add(node);
     }
 
@@ -428,7 +428,7 @@ public class SessionTree extends MudrodAbstract {
   }
 
   private List<SessionNode> getQueryNodes(SessionNode node) {
-    return this.getNodes(node, "datasetlist");
+    return this.getNodes(node, MudrodConstants.SEARCH_MARKER);
   }
 
   private List<SessionNode> getNodes(SessionNode node, String nodeKey) {
diff --git a/core/src/main/resources/config.properties 
b/core/src/main/resources/config.properties
index 6e2bd54..495d29c 100644
--- a/core/src/main/resources/config.properties
+++ b/core/src/main/resources/config.properties
@@ -29,7 +29,7 @@ mudrod.spark.optimize = repartition
 mudrod.log.index = log
 mudrod.ftp.prefix = FTP.
 mudrod.http.prefix = WWW.
-mudrod.base.url = http://podaac.jpl.nasa.gov
+mudrod.base.url = http://podaac.jpl.nasa.gov/
 mudrod.black.request.list = .js, .css, .jpg, .png, .ico, image_captcha, 
autocomplete, .gif, /alldata/, /api/, get / http/1.1, .jpeg, /ws/
 mudrod.black.agent.list = crawler, googlebot, bingbot, slurp, yacybot, 
rogerbot, yandexbot, -, apache-httpclient, java, curl
 mudrod.search.freq = 100
diff --git 
a/service/src/main/java/org/apache/sdap/mudrod/services/search/SessionDetailResource.java
 
b/service/src/main/java/org/apache/sdap/mudrod/services/search/SessionDetailResource.java
index dc31993..074378d 100644
--- 
a/service/src/main/java/org/apache/sdap/mudrod/services/search/SessionDetailResource.java
+++ 
b/service/src/main/java/org/apache/sdap/mudrod/services/search/SessionDetailResource.java
@@ -18,7 +18,7 @@ import com.google.gson.JsonObject;
 
 import org.apache.sdap.mudrod.main.MudrodConstants;
 import org.apache.sdap.mudrod.main.MudrodEngine;
-import org.apache.sdap.mudrod.weblog.structure.Session;
+import org.apache.sdap.mudrod.weblog.structure.session.Session;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to