Taewoo Kim has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/1481
Change subject: ASTERIXDB-1778: optimize the edit-distance-check function
......................................................................
ASTERIXDB-1778: optimize the edit-distance-check function
- Only calculate 2 * (threshold + 1) cells, rather than all cells per row.
- Terminate the calculation stpes early when it become obvious that
the possible edit-distance value is greater than the given threshold.
There is no reason to computes all cells in the 2*2 array.
Change-Id: Ibc8729c4514bb87c347dd7d50358fd897b769977
---
M asterixdb/asterix-doc/src/main/markdown/builtins/5_similarity.md
M
asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
M
asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
M
asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
M
asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
M
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
M
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
M
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
8 files changed, 173 insertions(+), 117 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/81/1481/1
diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/5_similarity.md
b/asterixdb/asterix-doc/src/main/markdown/builtins/5_similarity.md
index 89ef0f7..cb3318f 100644
--- a/asterixdb/asterix-doc/src/main/markdown/builtins/5_similarity.md
+++ b/asterixdb/asterix-doc/src/main/markdown/builtins/5_similarity.md
@@ -47,6 +47,36 @@
2
+### edit_distance_check ###
+* Syntax:
+
+ edit_distance_check(expression1, expression2, threshold)
+
+* Checks whether the edit distance of `expression1` and `expression2` is
within a given threshold.
+
+* Arguments:
+ * `expression1` : a `string` or a homogeneous `array` of a comparable item
type.
+ * `expression2` : The same type as `expression1`.
+ * `threshold` : a `bigint` that represents the distance threshold.
+* Return Value:
+ * an `array` with two items:
+ * The first item contains a `boolean` value representing whether the
edit distance of `expression1` and `expression2` is within the given threshold.
+ * The second item contains an `integer` that represents the edit
distance of `expression1` and `expression2` if the first item is true.
+ * If the first item is false, then the second item is set to
2147483647.
+ * `missing` if any argument is a `missing` value,
+ * `null` if any argument is a `null` value but no argument is a `missing`
value,
+ * a type error will be raised if:
+ * the first or second argument is any other non-string value,
+ * or, the third argument is any other non-bigint value.
+* Note: an [n_gram
index](similarity.html#UsingIndexesToSupportSimilarityQueries) can be utilized
for this function.
+* Example:
+
+ edit_distance_check("happy","hapr",2);
+
+
+* The expected result is:
+
+ [ true, 2 ]
### edit_distance_contains ###
* Syntax:
diff --git
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
index ac4a3dd..751597d 100644
---
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
+++
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/IGenericSimilarityMetric.java
@@ -22,8 +22,11 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
public interface IGenericSimilarityMetric {
- // returns similarity
- public float getSimilarity(IListIterator firstList, IListIterator
secondList) throws HyracksDataException;
+ // Returns -1 if this method supports early-termination and it becomes
obvious that
+ // the possible similarity value can't satisfy the given simThresh value.
+ // Else returns the calculated similarity value.
+ public float getActualSimilarityVal(IListIterator firstList, IListIterator
secondList, float simThresh)
+ throws HyracksDataException;
// returns -1 if does not satisfy threshold
// else returns similarity
diff --git
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
index d36d60d..70029a3 100644
---
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
+++
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetric.java
@@ -64,23 +64,6 @@
}
public static int getIntersectSize(int[] tokensX, int startX, int[]
tokensY, int startY) {
- // int intersectSize = 0;
- //
- // while (startX < tokensX.length && startY < tokensY.length) {
- // int tokenX = tokensX[startX];
- // int tokenY = tokensY[startY];
- // if (tokenX > tokenY) {
- // startY++;
- // } else if (tokenX < tokenY) {
- // startX++;
- // } else {
- // intersectSize++;
- // startX++;
- // startY++;
- // }
- // }
- //
- // return intersectSize;
return getIntersectSize(tokensX, startX, tokensX.length, tokensY,
startY, tokensY.length);
}
@@ -129,52 +112,6 @@
public static PartialIntersect getPartialIntersectSize(int[] tokensX,
int[] tokensY, int tokenStop) {
return getPartialIntersectSize(tokensX, 0, tokensX.length, tokensY, 0,
tokensY.length, tokenStop);
- }
-
- // @SuppressWarnings("unchecked")
- // public static int getIntersectSize(DataBag tokensX, DataBag tokensY) {
- // int intersectSize = 0;
- //
- // Iterator<Tuple> iteratorX = tokensX.iterator();
- // Iterator<Tuple> iteratorY = tokensY.iterator();
- //
- // Tuple nextX = null;
- // Tuple nextY = null;
- //
- // while ((nextX != null || iteratorX.hasNext())
- // && (nextY != null || iteratorY.hasNext())) {
- // if (nextX == null) {
- // nextX = iteratorX.next();
- // }
- // if (nextY == null) {
- // nextY = iteratorY.next();
- // }
- //
- // int cmp = nextX.compareTo(nextY);
- // if (cmp > 0) {
- // nextY = null;
- // } else if (cmp < 0) {
- // nextX = null;
- // } else {
- // intersectSize++;
- // nextX = null;
- // nextY = null;
- // }
- // }
- //
- // return intersectSize;
- // }
-
- // public abstract float getSimilarity(DataBag tokensX, DataBag tokensY);
-
- // public abstract float getSimilarity(DataBag tokensX, int lengthX,
- // DataBag tokensY, int lengthY);
-
- public float getSimilarity(IListIterator tokensX, IListIterator tokensY)
throws HyracksDataException {
- int intersectionSize = SimilarityMetric.getIntersectSize(tokensX,
tokensY);
- int totalSize = tokensX.size() + tokensY.size();
-
- return (float) intersectionSize / (totalSize - intersectionSize);
}
public abstract float getSimilarity(int[] tokensX, int startX, int
lengthX, int[] tokensY, int startY, int lengthY);
diff --git
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
index 9dce89e..ba0453a 100644
---
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
+++
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricEditDistance.java
@@ -26,32 +26,50 @@
public class SimilarityMetricEditDistance implements IGenericSimilarityMetric {
- // dp implementation only needs 2 rows
+ // This Dynamic Programming implementation only needs 2 rows.
private final int rows = 2;
private int cols;
private int[][] matrix;
- // for letter count filtering
- private final int[] fsLcCount = new int[128];
- private final int[] ssLcCount = new int[128];
+ // for ASCII letter count filtering
+ private final int[] letterCounts = new int[128];
public SimilarityMetricEditDistance() {
cols = 100; // arbitrary default value
matrix = new int[rows][cols];
}
+ /**
+ * Gets the edit distance value for the given two lists using a Dynamic
Programming approach.
+ * If a positive simThresh value is provided, this method only calculates
2 * (simThresh + 1) cells per row,
+ * not entire cells as an optimization. Refer to
https://en.wikipedia.org/wiki/Wagner–Fischer_algorithm
+ * for more details. Also, as one more optimization, during the
calculation steps, if this method finds out
+ * that the final edit distance value cannot be less than simThresh, this
method stops the calculation
+ * and immediately returns -1.
+ * If the final edit distance value is less than or equal to simThresh,
then that value will be returned.
+ * If a non-positive simThresh is given, then it calculates all cells and
rows and returns
+ * the final edit distance value.
+ *
+ * @return the edit distance of the two lists. -1 if a positive simThresh
value is given and the edit distance
+ * value is greater than the given simThresh.
+ */
@Override
- public float getSimilarity(IListIterator firstList, IListIterator
secondList) throws HyracksDataException {
+ public float getActualSimilarityVal(IListIterator firstList, IListIterator
secondList, float simThresh)
+ throws HyracksDataException {
int flLen = firstList.size();
int slLen = secondList.size();
- // reuse existing matrix if possible
+ // When a positive threshold is given, then we can apply two
optimizations.
+ int edThresh = (int) simThresh;
+ boolean canTerminateEarly = edThresh >= 0 ? true : false;
+
+ // Reuses the existing matrix if possible.
if (slLen >= cols) {
cols = slLen + 1;
matrix = new int[rows][cols];
}
- // init matrix
+ // Inits the matrix.
for (int i = 0; i <= slLen; i++) {
matrix[0][i] = i;
}
@@ -59,19 +77,53 @@
int currRow = 1;
int prevRow = 0;
- // expand dynamic programming matrix row by row
+ int from = 1;
+ int to = slLen;
+ int minDistance = -1;
+
+ // Expands the dynamic programming matrix row by row.
for (int i = 1; i <= flLen; i++) {
matrix[currRow][0] = i;
secondList.reset();
- for (int j = 1; j <= slLen; j++) {
+
+ // Only calculates 2 * (simThresh + 1) cells per row as an
optimization.
+ // Also keeps minDistance to see whether the possible edit
distance after
+ // each row calculation is greater than the simThresh.
+ if (canTerminateEarly) {
+ minDistance = edThresh + 1;
+ from = Math.max(i - edThresh - 1, 1);
+ to = Math.min(i + edThresh + 1, slLen);
+ for (int j = 1; j < from; j++) {
+ // Moves the pointer of the second list to the point where
the calculation starts for this row.
+ secondList.next();
+ }
+ if (from > 1) {
+ // Sets the left Boundary cell value to make sure that the
calculation is correct.
+ matrix[currRow][from - 1] = edThresh + 1;
+ }
+ if (to < slLen) {
+ // Sets the right Boundary cell value to make sure that
the calculation is correct.
+ matrix[currRow][to + 1] = edThresh + 1;
+ }
+ }
+
+ for (int j = from; j <= to; j++) {
matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1,
matrix[currRow][j - 1] + 1),
matrix[prevRow][j - 1] +
(firstList.compare(secondList) == 0 ? 0 : 1));
+ // Replaces minDistance after each cell computation if we find
a smaller value than that.
+ if (canTerminateEarly && matrix[currRow][j] < minDistance) {
+ minDistance = matrix[currRow][j];
+ }
+
secondList.next();
}
-
+ // If the minimum distance value is greater than the given
threshold, no reason to process next row.
+ if (canTerminateEarly && minDistance > edThresh) {
+ return -1;
+ }
firstList.next();
int tmp = currRow;
@@ -82,6 +134,9 @@
return matrix[prevRow][slLen];
}
+ /**
+ * Gets the edit distance value for the given two lists.
+ */
@Override
public float getSimilarity(IListIterator firstList, IListIterator
secondList, float simThresh)
throws HyracksDataException {
@@ -96,8 +151,8 @@
return -1;
}
- float ed = getSimilarity(firstList, secondList);
- if (ed > edThresh) {
+ float ed = getActualSimilarityVal(firstList, secondList, simThresh);
+ if (ed > edThresh || ed < 0) {
return -1;
} else {
return ed;
@@ -155,7 +210,8 @@
}
// faster implementation for common case of string edit distance
- public int UTF8StringEditDistance(byte[] leftBytes, int fsStart, byte[]
rightBytes, int ssStart) {
+ public int getActualUTF8StringEditDistanceVal(byte[] leftBytes, int
fsStart, byte[] rightBytes, int ssStart,
+ int edThresh) {
int fsLen = UTF8StringUtil.getStringLength(leftBytes, fsStart);
int ssLen = UTF8StringUtil.getStringLength(rightBytes, ssStart);
@@ -164,7 +220,10 @@
int fsMetaLen = UTF8StringUtil.getNumBytesToStoreLength(fsUtfLen);
int ssMetaLen = UTF8StringUtil.getNumBytesToStoreLength(ssUtfLen);
- // reuse existing matrix if possible
+ // When a positive threshold is given, then we can apply two
optimizations.
+ boolean canTerminateEarly = edThresh >= 0 ? true : false;
+
+ // Reuses the existing matrix if possible.
if (ssLen >= cols) {
cols = ssLen + 1;
matrix = new int[rows][cols];
@@ -173,7 +232,7 @@
int fsDataStart = fsStart + fsMetaLen;
int ssDataStart = ssStart + ssMetaLen;
- // init matrix
+ // Inits the matrix
for (int i = 0; i <= ssLen; i++) {
matrix[0][i] = i;
}
@@ -181,19 +240,55 @@
int currRow = 1;
int prevRow = 0;
- // expand dynamic programming matrix row by row
+ int from = 1;
+ int to = ssLen;
+ int minDistance = -1;
+
+ // Expands the dynamic programming matrix row by row.
int fsPos = fsDataStart;
for (int i = 1; i <= fsLen; i++) {
matrix[currRow][0] = i;
char fsChar =
Character.toLowerCase(UTF8StringUtil.charAt(leftBytes, fsPos));
int ssPos = ssDataStart;
- for (int j = 1; j <= ssLen; j++) {
+
+ // Only calculates 2 * (simThresh + 1) cells per row as an
optimization.
+ // Also keeps minDistance to see whether the possible edit
distance after
+ // each row calculation is greater than the simThresh.
+ if (canTerminateEarly) {
+ minDistance = edThresh + 1;
+ from = Math.max(i - edThresh - 1, 1);
+ to = Math.min(i + edThresh + 1, ssLen);
+ for (int j = 1; j < from; j++) {
+ // Moves the pointer of the second list to the point where
the calculation starts for this row.
+ ssPos += UTF8StringUtil.charSize(rightBytes, ssPos);
+ }
+ if (from > 1) {
+ // Sets the left Boundary cell value to make sure that the
calculation is correct.
+ matrix[currRow][from - 1] = edThresh + 1;
+ }
+ if (to < ssLen) {
+ // Sets the right Boundary cell value to make sure that
the calculation is correct.
+ matrix[currRow][to + 1] = edThresh + 1;
+ }
+ }
+
+ for (int j = from; j <= to; j++) {
char ssChar =
Character.toLowerCase(UTF8StringUtil.charAt(rightBytes, ssPos));
matrix[currRow][j] = Math.min(Math.min(matrix[prevRow][j] + 1,
matrix[currRow][j - 1] + 1),
matrix[prevRow][j - 1] + (fsChar == ssChar ? 0 : 1));
+ // Replaces minDistance after each cell computation if we find
a smaller value than that.
+ if (canTerminateEarly && matrix[currRow][j] < minDistance) {
+ minDistance = matrix[currRow][j];
+ }
+
ssPos += UTF8StringUtil.charSize(rightBytes, ssPos);
+ }
+
+ // If the minimum distance value is greater than the given
threshold, no reason to process next row.
+ if (canTerminateEarly && minDistance > edThresh) {
+ return -1;
}
fsPos += UTF8StringUtil.charSize(leftBytes, fsPos);
int tmp = currRow;
@@ -218,8 +313,7 @@
}
// initialize letter count filtering
- Arrays.fill(fsLcCount, 0);
- Arrays.fill(ssLcCount, 0);
+ Arrays.fill(letterCounts, 0);
// compute letter counts for first string
int fsPos = fsStart + fsMetaLen;
@@ -227,7 +321,7 @@
while (fsPos < fsEnd) {
char c = Character.toLowerCase(UTF8StringUtil.charAt(bytesLeft,
fsPos));
if (c < 128) {
- fsLcCount[c]++;
+ letterCounts[c]++;
}
fsPos += UTF8StringUtil.charSize(bytesLeft, fsPos);
}
@@ -238,30 +332,30 @@
while (ssPos < ssEnd) {
char c = Character.toLowerCase(UTF8StringUtil.charAt(bytesRight,
ssPos));
if (c < 128) {
- ssLcCount[c]++;
+ letterCounts[c]--;
}
ssPos += UTF8StringUtil.charSize(bytesRight, ssPos);
}
// apply filter
- int gtSum = 0;
- int ltSum = 0;
+ int secondTofirstDiffSum = 0;
+ int firstToSecondDiffSum = 0;
for (int i = 0; i < 128; i++) {
- if (fsLcCount[i] > ssLcCount[i]) {
- gtSum += fsLcCount[i] - ssLcCount[i];
- if (gtSum > edThresh) {
+ if (letterCounts[i] >= 0) {
+ secondTofirstDiffSum += letterCounts[i];
+ if (secondTofirstDiffSum > edThresh) {
return -1;
}
} else {
- ltSum += ssLcCount[i] - fsLcCount[i];
- if (ltSum > edThresh) {
+ firstToSecondDiffSum += Math.abs(letterCounts[i]);
+ if (firstToSecondDiffSum > edThresh) {
return -1;
}
}
}
- int ed = UTF8StringEditDistance(bytesLeft, fsStart, bytesRight,
ssStart);
- if (ed > edThresh) {
+ int ed = getActualUTF8StringEditDistanceVal(bytesLeft, fsStart,
bytesRight, ssStart, edThresh);
+ if (ed > edThresh || ed < 0) {
return -1;
} else {
return ed;
diff --git
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
index f4162c7..cafc7fb 100644
---
a/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
+++
b/asterixdb/asterix-fuzzyjoin/src/main/java/org/apache/asterix/fuzzyjoin/similarity/SimilarityMetricJaccard.java
@@ -44,24 +44,10 @@
return ((float) setX.size()) / (tokensX.length + tokensY.length -
setX.size());
}
- // @Override
- // public float getSimilarity(DataBag tokensX, DataBag tokensY) {
- // return getSimilarity(tokensX, (int) tokensX.size(), tokensY,
- // (int) tokensY.size());
- // }
-
- // @Override
- // public float getSimilarity(DataBag tokensX, int lengthX, DataBag
tokensY,
- // int lengthY) {
- // int intersectionSize = SimilarityMetric.getIntersectSize(tokensX,
- // tokensY);
- // int totalSize = lengthX + lengthY;
- //
- // return (float) intersectionSize / (totalSize - intersectionSize);
- // }
-
+ // SimThresh value will be ignored for this method since it doesn't
provide an early termination.
@Override
- public float getSimilarity(IListIterator tokensX, IListIterator tokensY)
throws HyracksDataException {
+ public float getActualSimilarityVal(IListIterator tokensX, IListIterator
tokensY, float simThresh)
+ throws HyracksDataException {
int intersectionSize = SimilarityMetric.getIntersectSize(tokensX,
tokensY);
int totalSize = tokensX.size() + tokensY.size();
@@ -81,7 +67,7 @@
return -1f;
}
- float jacc = getSimilarity(firstList, secondList);
+ float jacc = getActualSimilarityVal(firstList, secondList, simThresh);
if (jacc < simThresh) {
return -1f;
} else {
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
index fee34b9..3dd3516 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceCheckEvaluator.java
@@ -21,6 +21,8 @@
import java.io.IOException;
import org.apache.asterix.builders.OrderedListBuilder;
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
import org.apache.asterix.om.base.ABoolean;
import org.apache.asterix.om.functions.BuiltinFunctions;
@@ -77,6 +79,10 @@
try {
edThresh =
ATypeHierarchy.getIntegerValue(BuiltinFunctions.EDIT_DISTANCE_CHECK.getName(),
2,
argPtrThreshold.getByteArray(),
argPtrThreshold.getStartOffset());
+ if (edThresh < 0) {
+ throw new RuntimeDataException(ErrorCode.NEGATIVE_VALUE,
BuiltinFunctions.EDIT_DISTANCE_CHECK.getName(),
+ 3, edThresh);
+ }
editDistance = computeResult(argPtr1, argPtr2, firstTypeTag);
writeResult(editDistance);
} catch (IOException e) {
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
index c9d3731..92f8df3 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/EditDistanceEvaluator.java
@@ -105,13 +105,13 @@
switch (argType) {
case STRING: {
- return ed.UTF8StringEditDistance(leftBytes, leftStartOffset +
typeIndicatorSize, rightBytes,
- rightStartOffset + typeIndicatorSize);
+ return ed.getActualUTF8StringEditDistanceVal(leftBytes,
leftStartOffset + typeIndicatorSize, rightBytes,
+ rightStartOffset + typeIndicatorSize, -1);
}
case ORDEREDLIST: {
firstOrdListIter.reset(leftBytes, leftStartOffset);
secondOrdListIter.reset(rightBytes, rightStartOffset);
- return (int) ed.getSimilarity(firstOrdListIter,
secondOrdListIter);
+ return (int) ed.getActualSimilarityVal(firstOrdListIter,
secondOrdListIter, -1);
}
default: {
throw new
TypeMismatchException(BuiltinFunctions.EDIT_DISTANCE, 0, argType.serialize(),
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
index d40cb67..3a60295 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardSortedEvaluator.java
@@ -35,6 +35,6 @@
@Override
protected float computeResult() throws HyracksDataException {
- return jaccard.getSimilarity(firstListIter, secondListIter);
+ return jaccard.getActualSimilarityVal(firstListIter, secondListIter,
-1.0f);
}
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/1481
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ibc8729c4514bb87c347dd7d50358fd897b769977
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Taewoo Kim <[email protected]>