[ 
https://issues.apache.org/jira/browse/SDAP-52?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16425808#comment-16425808
 ] 

ASF GitHub Bot commented on SDAP-52:
------------------------------------

lewismc closed pull request #9: SDAP-52 Improve ranking speed
URL: https://github.com/apache/incubator-sdap-mudrod/pull/9
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java 
b/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java
index c8829dd..37e7508 100644
--- 
a/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java
+++ 
b/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java
@@ -220,7 +220,7 @@ public void aggregateRelatedTerms(String input, String 
model) {
     SearchResponse usrhis = 
es.getClient().prepareSearch(props.getProperty(INDEX_NAME)).setTypes(model).setQuery(QueryBuilders.termQuery("keywords",
 input)).addSort(WEIGHT, SortOrder.DESC).setSize(11)
         .execute().actionGet();
 
-    LOG.info("\n************************ {} 
results***************************", model);
+    LOG.info("{} results", model);
     for (SearchHit hit : usrhis.getHits().getHits()) {
       Map<String, Object> result = hit.getSource();
       String keywords = (String) result.get("keywords");
diff --git a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java 
b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java
index 21aa646..af7e6a9 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java
@@ -40,16 +40,6 @@ public Ranker(Properties props, ESDriver es, SparkDriver 
spark) {
       le = new Learner(spark, 
props.getProperty(MudrodConstants.RANKING_MODEL));
   }
 
-  /**
-   * Method of comparing results based on final score
-   */
-  public class ResultComparator implements Comparator<SResult> {
-    @Override
-    public int compare(SResult o1, SResult o2) {
-      return o2.below.compareTo(o1.below);
-    }
-  }
-
   /**
    * Method of calculating mean value
    *
@@ -147,42 +137,33 @@ private double getNDForm(double d) {
       }
     }
 
-    // using collection.sort directly would cause an "not transitive" error
-    // this is because the training model is not a overfitting model
-    for (int j = 0; j < resultList.size(); j++) {
-      for (int k = 0; k < resultList.size(); k++) {
-        if (k != j) {
-          resultList.get(j).below += comp(resultList.get(j), 
resultList.get(k));
-        }
-      }
-    }
-
     Collections.sort(resultList, new ResultComparator());
     return resultList;
   }
-
+  
   /**
-   * Method of compare two search resutls
-   *
-   * @param o1 search result 1
-   * @param o2 search result 2
-   * @return 1 if o1 is greater than o2, 0 otherwise
+   * Method of comparing results based on final score
    */
-  public int comp(SResult o1, SResult o2) {
-    List<Double> instList = new ArrayList<>();
-    for (int i = 0; i < SResult.rlist.length; i++) {
-      double o2Score = SResult.get(o2, SResult.rlist[i]);
-      double o1Score = SResult.get(o1, SResult.rlist[i]);
-      instList.add(o2Score - o1Score);
-    }
+  public class ResultComparator implements Comparator<SResult> {
+    @Override
+    /**
+     * @param o1  one item from the search result list
+     * @param o2 another item from the search result list
+     * @return 1 meaning o1>o2, 0 meaning o1=o2
+     */
+    public int compare(SResult o1, SResult o2) {
+      List<Double> instList = new ArrayList<>();
+      for (String str: SResult.rlist) {
+        double o2Score = SResult.get(o2, str);
+        double o1Score = SResult.get(o1, str);
+        instList.add(o2Score - o1Score);
+      }
 
-    double[] ins = instList.stream().mapToDouble(i -> i).toArray();
-    LabeledPoint insPoint = new LabeledPoint(99.0, Vectors.dense(ins));
-    double prediction = le.classify(insPoint);
-    if (equalComp(prediction, 1)) { //different from weka where the return 
value is 1 or 2
-      return 0;
-    } else {
-      return 1;
+      double[] ins = instList.stream().mapToDouble(i -> i).toArray();
+      LabeledPoint insPoint = new LabeledPoint(99.0, Vectors.dense(ins));
+      int prediction = (int)le.classify(insPoint);
+      
+      return prediction;
     }
   }
 
diff --git a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java 
b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java
index 8791bf4..a4fe686 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java
@@ -80,24 +80,6 @@ public Double getPop(Double pop) {
     return pop;
   }
 
-  /**
-   * Method of checking if query exists in a certain attribute
-   *
-   * @param strList attribute value in the form of ArrayList
-   * @param query   query string
-   * @return 1 means query exists, 0 otherwise
-   */
-  public Double exists(ArrayList<String> strList, String query) {
-    Double val = 0.0;
-    if (strList != null) {
-      String str = String.join(", ", strList);
-      if (str != null && str.length() != 0 && 
str.toLowerCase().trim().contains(query)) {
-        val = 1.0;
-      }
-    }
-    return val;
-  }
-
   /**
    * Main method of semantic search
    *
diff --git 
a/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java 
b/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java
index fce4e34..81de2b4 100644
--- a/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java
+++ b/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java
@@ -73,7 +73,6 @@
   public Double prediction = 0.0;
   public String label = null;
 
-  //add by quintinali
   public String startDate;
   public String endDate;
   public String sensors;


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> Improve ranking speed
> ---------------------
>
>                 Key: SDAP-52
>                 URL: https://issues.apache.org/jira/browse/SDAP-52
>             Project: Apache Science Data Analytics Platform
>          Issue Type: Improvement
>          Components: mudrod
>            Reporter: Yongyao Jiang
>            Assignee: Yongyao Jiang
>            Priority: Minor
>
> The time complexity of the current machine learning algorithm O(n2) because 
> the order of some items are not transitive and using collection.sort directly 
> would cause an "comparison method violates it general contract" error. Needs 
> to improve it to O(N*logN).



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to