lewismc closed pull request #9: SDAP-52 Improve ranking speed URL: https://github.com/apache/incubator-sdap-mudrod/pull/9
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java b/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java index c8829dd..37e7508 100644 --- a/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java +++ b/core/src/main/java/org/apache/sdap/mudrod/integration/LinkageIntegration.java @@ -220,7 +220,7 @@ public void aggregateRelatedTerms(String input, String model) { SearchResponse usrhis = es.getClient().prepareSearch(props.getProperty(INDEX_NAME)).setTypes(model).setQuery(QueryBuilders.termQuery("keywords", input)).addSort(WEIGHT, SortOrder.DESC).setSize(11) .execute().actionGet(); - LOG.info("\n************************ {} results***************************", model); + LOG.info("{} results", model); for (SearchHit hit : usrhis.getHits().getHits()) { Map<String, Object> result = hit.getSource(); String keywords = (String) result.get("keywords"); diff --git a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java index 21aa646..af7e6a9 100644 --- a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java +++ b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Ranker.java @@ -40,16 +40,6 @@ public Ranker(Properties props, ESDriver es, SparkDriver spark) { le = new Learner(spark, props.getProperty(MudrodConstants.RANKING_MODEL)); } - /** - * Method of comparing results based on final score - */ - public class ResultComparator implements Comparator<SResult> { - @Override - public int compare(SResult o1, SResult o2) { - return o2.below.compareTo(o1.below); - } - } - /** * Method of calculating mean value * @@ -147,42 +137,33 @@ private double getNDForm(double d) { } } - // using collection.sort directly would cause an "not transitive" error - // this is because the training model is not a overfitting model - for (int j = 0; j < resultList.size(); j++) { - for (int k = 0; k < resultList.size(); k++) { - if (k != j) { - resultList.get(j).below += comp(resultList.get(j), resultList.get(k)); - } - } - } - Collections.sort(resultList, new ResultComparator()); return resultList; } - + /** - * Method of compare two search resutls - * - * @param o1 search result 1 - * @param o2 search result 2 - * @return 1 if o1 is greater than o2, 0 otherwise + * Method of comparing results based on final score */ - public int comp(SResult o1, SResult o2) { - List<Double> instList = new ArrayList<>(); - for (int i = 0; i < SResult.rlist.length; i++) { - double o2Score = SResult.get(o2, SResult.rlist[i]); - double o1Score = SResult.get(o1, SResult.rlist[i]); - instList.add(o2Score - o1Score); - } + public class ResultComparator implements Comparator<SResult> { + @Override + /** + * @param o1 one item from the search result list + * @param o2 another item from the search result list + * @return 1 meaning o1>o2, 0 meaning o1=o2 + */ + public int compare(SResult o1, SResult o2) { + List<Double> instList = new ArrayList<>(); + for (String str: SResult.rlist) { + double o2Score = SResult.get(o2, str); + double o1Score = SResult.get(o1, str); + instList.add(o2Score - o1Score); + } - double[] ins = instList.stream().mapToDouble(i -> i).toArray(); - LabeledPoint insPoint = new LabeledPoint(99.0, Vectors.dense(ins)); - double prediction = le.classify(insPoint); - if (equalComp(prediction, 1)) { //different from weka where the return value is 1 or 2 - return 0; - } else { - return 1; + double[] ins = instList.stream().mapToDouble(i -> i).toArray(); + LabeledPoint insPoint = new LabeledPoint(99.0, Vectors.dense(ins)); + int prediction = (int)le.classify(insPoint); + + return prediction; } } diff --git a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java index 8791bf4..a4fe686 100644 --- a/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java +++ b/core/src/main/java/org/apache/sdap/mudrod/ssearch/Searcher.java @@ -80,24 +80,6 @@ public Double getPop(Double pop) { return pop; } - /** - * Method of checking if query exists in a certain attribute - * - * @param strList attribute value in the form of ArrayList - * @param query query string - * @return 1 means query exists, 0 otherwise - */ - public Double exists(ArrayList<String> strList, String query) { - Double val = 0.0; - if (strList != null) { - String str = String.join(", ", strList); - if (str != null && str.length() != 0 && str.toLowerCase().trim().contains(query)) { - val = 1.0; - } - } - return val; - } - /** * Main method of semantic search * diff --git a/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java b/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java index fce4e34..81de2b4 100644 --- a/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java +++ b/core/src/main/java/org/apache/sdap/mudrod/ssearch/structure/SResult.java @@ -73,7 +73,6 @@ public Double prediction = 0.0; public String label = null; - //add by quintinali public String startDate; public String endDate; public String sensors; ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services