Author: gsingers
Date: Mon Jan 4 16:59:56 2010
New Revision: 895701
URL: http://svn.apache.org/viewvc?rev=895701&view=rev
Log:
SOLR-1302: some slight refactoring for more reusable distance calculations
Modified:
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
Modified:
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java?rev=895701&r1=895700&r2=895701&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/DistanceUtils.java
Mon Jan 4 16:59:56 2010
@@ -28,6 +28,66 @@
public static final double RADIANS_TO_DEGREES = 180.0 / Math.PI;
/**
+ * Calculate the p-norm (i.e. length) between two vectors
+ *
+ * @param vec1 The first vector
+ * @param vec2 The second vector
+ * @param power The power (2 for Euclidean distance, 1 for manhattan, etc.)
+ * @return The length.
+ * <p/>
+ * See http://en.wikipedia.org/wiki/Lp_space
+ * @see #vectorDistance(double[], double[], double, double)
+ */
+ public static double vectorDistance(double[] vec1, double[] vec2, double
power) {
+ return vectorDistance(vec1, vec2, power, 1.0 / power);
+ }
+
+ /**
+ * Calculate the p-norm (i.e. length) between two vectors
+ *
+ * @param vec1 The first vector
+ * @param vec2 The second vector
+ * @param power The power (2 for Euclidean distance, 1 for manhattan,
etc.)
+ * @param oneOverPower If you've precalculated oneOverPower and cached it,
use this method to save one division operation over {...@link
#vectorDistance(double[], double[], double)}.
+ * @return The length.
+ */
+ public static double vectorDistance(double[] vec1, double[] vec2, double
power, double oneOverPower) {
+ double result = 0;
+
+ if (power == 0) {
+ for (int i = 0; i < vec1.length; i++) {
+ result += vec1[i] - vec2[i] == 0 ? 0 : 1;
+ }
+
+ } else if (power == 1.0) {
+ for (int i = 0; i < vec1.length; i++) {
+ result += vec1[i] - vec2[i];
+ }
+ } else if (power == 2.0) {
+ result = Math.sqrt(squaredEuclideanDistance(vec1, vec2));
+ } else if (power == Integer.MAX_VALUE || Double.isInfinite(power))
{//infininte norm?
+ for (int i = 0; i < vec1.length; i++) {
+ result = Math.max(vec1[i], vec2[i]);
+ }
+ } else {
+ for (int i = 0; i < vec1.length; i++) {
+ result += Math.pow(vec1[i] - vec2[i], power);
+ }
+ result = Math.pow(result, oneOverPower);
+ }
+ return result;
+ }
+
+ public static double squaredEuclideanDistance(double[] vec1, double[] vec2) {
+ double result = 0;
+ for (int i = 0; i < vec1.length; i++) {
+ double v = vec1[i] - vec2[i];
+ result += v * v;
+ }
+ return result;
+ }
+
+ /**
* @param x1 The x coordinate of the first point
* @param y1 The y coordinate of the first point
* @param x2 The x coordinate of the second point
@@ -92,6 +152,46 @@
}
/**
+ * Given a string containing <i>dimension</i> values encoded in it,
separated by commas, return a double array of length <i>dimension</i>
+ * containing the values.
+ *
+ * @param out A preallocated array. Must be size dimension. If it
is not it will be resized.
+ * @param externalVal The value to parse
+ * @param dimension The expected number of values for the point
+ * @return An array of the values that make up the point (aka vector)
+ * @throws {...@link SolrException} if the dimension specified does not
match the number of values in the externalValue.
+ */
+ public static double[] parsePointDouble(double[] out, String externalVal,
int dimension) {
+ if (out == null || out.length != dimension) out = new double[dimension];
+ int idx = externalVal.indexOf(',');
+ int end = idx;
+ int start = 0;
+ int i = 0;
+ if (idx == -1 && dimension == 1 && externalVal.length() > 0) {//we have a
single point, dimension better be 1
+ out[0] = Double.parseDouble(externalVal.trim());
+ i = 1;
+ } else if (idx > 0) {//if it is zero, that is an error
+ //Parse out a comma separated list of point values, as in:
73.5,89.2,7773.4
+ for (; i < dimension; i++) {
+ //TODO: abstract common code with other parsePoint
+ while (start < end && externalVal.charAt(start) == ' ') start++;
+ while (end > start && externalVal.charAt(end - 1) == ' ') end--;
+ out[i] = Double.parseDouble(externalVal.substring(start, end));
+ start = idx + 1;
+ end = externalVal.indexOf(',', start);
+ if (end == -1) {
+ end = externalVal.length();
+ }
+ }
+ }
+ if (i != dimension) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"incompatible dimension (" + dimension +
+ ") and values (" + externalVal + "). Only " + i + " values
specified");
+ }
+ return out;
+ }
+
+ /**
* extract (by calling {...@link #parsePoint(String[], String, int)} and
validate the latitude and longitude contained
* in the String by making sure the latitude is between 90 & -90 and
longitude is between -180 and 180.
* <p/>
@@ -105,7 +205,7 @@
if (latLon == null) {
latLon = new double[2];
}
- String[] toks = DistanceUtils.parsePoint(null, latLonStr, 2);
+ double[] toks = DistanceUtils.parsePointDouble(null, latLonStr, 2);
latLon[0] = Double.valueOf(toks[0]);
if (latLon[0] < -90.0 || latLon[0] > 90.0) {
Modified:
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java?rev=895701&r1=895700&r2=895701&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/SquaredEuclideanFunction.java
Mon Jan 4 16:59:56 2010
@@ -23,8 +23,7 @@
/**
* While not strictly a distance, the Sq. Euclidean Distance is often all that
is needed in many applications
* that require a distance, thus saving a sq. rt. calculation
- *
- **/
+ */
public class SquaredEuclideanFunction extends VectorDistanceFunction {
protected String name = "sqedist";
@@ -42,16 +41,13 @@
* @param doc The doc to score
*/
protected double distance(int doc, DocValues dv1, DocValues dv2) {
- double result = 0;
- double [] vals1 = new double[source1.dimension()];
- double [] vals2 = new double[source1.dimension()];
+
+ double[] vals1 = new double[source1.dimension()];
+ double[] vals2 = new double[source1.dimension()];
dv1.doubleVal(doc, vals1);
dv2.doubleVal(doc, vals2);
- for (int i = 0; i < vals1.length; i++) {
- double v = vals1[i] - vals2[i];
- result += v * v;
- }
- return result;
+
+ return DistanceUtils.squaredEuclideanDistance(vals1, vals2);
}
@Override
Modified:
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java?rev=895701&r1=895700&r2=895701&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
Mon Jan 4 16:59:56 2010
@@ -20,8 +20,8 @@
import org.apache.lucene.search.Searcher;
import org.apache.solr.common.SolrException;
import org.apache.solr.search.function.DocValues;
-import org.apache.solr.search.function.ValueSource;
import org.apache.solr.search.function.MultiValueSource;
+import org.apache.solr.search.function.ValueSource;
import java.io.IOException;
import java.util.Map;
@@ -62,45 +62,18 @@
/**
* Calculate the distance
*
- * @param doc The current doc
+ * @param doc The current doc
* @param dv1 The values from the first MultiValueSource
* @param dv2 The values from the second MultiValueSource
* @return The distance
*/
protected double distance(int doc, DocValues dv1, DocValues dv2) {
- double result = 0;
//Handle some special cases:
- double [] vals1 = new double[source1.dimension()];
- double [] vals2 = new double[source1.dimension()];
+ double[] vals1 = new double[source1.dimension()];
+ double[] vals2 = new double[source1.dimension()];
dv1.doubleVal(doc, vals1);
dv2.doubleVal(doc, vals2);
- if (power == 0) {
- for (int i = 0; i < vals1.length; i++) {
- result += vals1[i] - vals2[i] == 0 ? 0 :1;
- }
-
- } else if (power == 1.0) {
- for (int i = 0; i < vals1.length; i++) {
- result += vals1[i] - vals2[i];
- }
- } else if (power == 2.0) {
- for (int i = 0; i < vals1.length; i++) {
- double v = vals1[i] - vals2[i];
- result += v * v;
- }
- result = Math.sqrt(result);
- } else if (power == Integer.MAX_VALUE || Double.isInfinite(power))
{//infininte norm?
- for (int i = 0; i < vals1.length; i++) {
- result = Math.max(vals1[i], vals2[i]);
- }
- } else {
- for (int i = 0; i < vals1.length; i++) {
- result += Math.pow(vals1[i] - vals2[i], power);
- }
- result = Math.pow(result, oneOverPower);
- }
-
- return result;
+ return DistanceUtils.vectorDistance(vals1, vals2, power, oneOverPower);
}
@Override
@@ -111,7 +84,6 @@
final DocValues vals2 = source2.getValues(context, reader);
-
return new DocValues() {
@Override
public byte byteVal(int doc) {
@@ -120,7 +92,7 @@
@Override
public short shortVal(int doc) {
- return (short)doubleVal(doc);
+ return (short) doubleVal(doc);
}
public float floatVal(int doc) {