Author: desruisseaux
Date: Fri Feb 3 06:59:46 2017
New Revision: 1781499
URL: http://svn.apache.org/viewvc?rev=1781499&view=rev
Log:
Implement CoordinateFormat.parse(...) method.
This require a fix in the way UnitFormat and DefaultFormat detect the end of
the string that they have to parse.
Modified:
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
Modified:
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
---
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
[UTF-8] (original)
+++
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
[UTF-8] Fri Feb 3 06:59:46 2017
@@ -24,6 +24,7 @@ import java.text.DecimalFormat;
import java.text.FieldPosition;
import java.text.ParsePosition;
import java.text.ParseException;
+import java.util.Arrays;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;
@@ -92,6 +93,19 @@ public class CoordinateFormat extends Co
private static final long serialVersionUID = 8324486673169133932L;
/**
+ * Maximal number of characters to convert to {@link String} if the text
to parse is not a string instance.
+ * This is an arbitrary limit that may change (or be removed) in any
future SIS version.
+ */
+ private static final int READ_AHEAD_LIMIT = 256;
+
+ /**
+ * Maximal number of dimensions to use when parsing a coordinate without
{@link #defaultCRS}.
+ * This is an arbitrary limit that may change (or be removed) in any
future SIS version.
+ * To avoid this limitation, users are encouraged to specify a default CRS.
+ */
+ private static final int DEFAULT_DIMENSION = 4;
+
+ /**
* The separator between each coordinate values to be formatted.
* The default value is a space.
*/
@@ -567,6 +581,8 @@ public class CoordinateFormat extends Co
public DirectPosition parse(final CharSequence text, final ParsePosition
pos) throws ParseException {
ArgumentChecks.ensureNonNull("text", text);
ArgumentChecks.ensureNonNull("pos", pos);
+ final int start = pos.getIndex();
+ final int length = text.length();
/*
* The NumberFormat, DateFormat and AngleFormat work only on String
values, not on CharSequence.
* If the given text is not a String, we will convert an arbitrarily
small section of the given
@@ -580,9 +596,9 @@ public class CoordinateFormat extends Co
subPos = pos;
asString = (String) text;
} else {
- offset = pos.getIndex();
+ offset = start;
subPos = new ParsePosition(0);
- asString = text.subSequence(offset, Math.min(offset + 256,
text.length())).toString();
+ asString = text.subSequence(start, Math.min(start +
READ_AHEAD_LIMIT, length)).toString();
}
/*
* The Format instances to be used for each ordinate values is
determined by the default CRS.
@@ -591,9 +607,53 @@ public class CoordinateFormat extends Co
if (lastCRS != defaultCRS) {
initialize(defaultCRS);
}
- double[] ordinates = new double[formats.length]; // TODO: null
if no CRS has been specified.
+ final double[] ordinates;
+ Format format;
+ final Format[] formats = this.formats;
+ if (formats != null) {
+ format = null;
+ ordinates = new double[formats.length];
+ } else {
+ format = getFormat(Number.class);
+ ordinates = new double[DEFAULT_DIMENSION];
+ }
+ /*
+ * For each ordinate value except the first one, we need to skip the
separator.
+ * If we do not find the separator, we may consider that we reached
the coordinate
+ * end ahead of time. We currently allow that only for coordinate
without CRS.
+ */
for (int i=0; i < ordinates.length; i++) {
- final Object object = formats[i].parseObject(asString, subPos);
+ if (i != 0) {
+ final int end = subPos.getIndex();
+ int index = offset + end;
+ while (!CharSequences.regionMatches(text, index, separator)) {
+ if (index < length) {
+ final int c = Character.codePointAt(text, index);
+ if (Character.isSpaceChar(c)) {
+ index += Character.charCount(c);
+ continue;
+ }
+ }
+ if (formats == null) {
+ pos.setIndex(index);
+ return new
GeneralDirectPosition(Arrays.copyOf(ordinates, i));
+ }
+ pos.setIndex(start);
+ pos.setErrorIndex(index);
+ throw new LocalizedParseException(getLocale(),
Errors.Keys.UnexpectedCharactersAfter_2,
+ new CharSequence[] {text.subSequence(start, end),
CharSequences.token(text, index)}, index);
+ }
+ subPos.setIndex(index + separator.length() - offset);
+ }
+ /*
+ * At this point 'subPos' is set to the beginning of the next
ordinate to parse in 'asString'.
+ * Parse the value as a number, angle or date, as determined from
the coordinate system axis.
+ */
+ if (formats != null) {
+ format = formats[i];
+ }
+ @SuppressWarnings("null")
+ final Object object = format.parseObject(asString, subPos);
if (object == null) {
/*
* If we failed to parse, build an error message with the type
that was expected for that ordinate.
@@ -609,7 +669,7 @@ public class CoordinateFormat extends Co
case DATE: type = Date.class; break;
}
}
- pos.setIndex(offset);
+ pos.setIndex(start);
if (subPos != pos) {
pos.setErrorIndex(offset + subPos.getErrorIndex());
}
@@ -656,7 +716,7 @@ public class CoordinateFormat extends Co
value = ((Unit<?>)
unit).getConverterToAny(target).convert(value);
} catch (IncommensurableException e) {
index += offset;
- pos.setIndex(offset);
+ pos.setIndex(start);
pos.setErrorIndex(index);
throw (ParseException) new
ParseException(e.getMessage(), index).initCause(e);
}
@@ -674,20 +734,6 @@ public class CoordinateFormat extends Co
value = -value;
}
ordinates[i] = value;
- /*
- * We require the separator to be present before to continue.
- */
- final int index = offset + subPos.getIndex();
- if (!CharSequences.regionMatches(text, index, separator)) {
- if (i+1 == ordinates.length) {
- break;
- }
- pos.setIndex(offset);
- pos.setErrorIndex(index);
- throw new LocalizedParseException(getLocale(),
Errors.Keys.UnexpectedCharactersAfter_2,
- new CharSequence[] {text.subSequence(offset, index),
CharSequences.token(text, index)}, index);
- }
- subPos.setIndex(index + separator.length() - offset);
}
final GeneralDirectPosition position = new
GeneralDirectPosition(ordinates);
position.setCoordinateReferenceSystem(defaultCRS);
Modified:
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
---
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
[UTF-8] (original)
+++
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
[UTF-8] Fri Feb 3 06:59:46 2017
@@ -21,14 +21,15 @@ import java.util.Locale;
import java.util.TimeZone;
import java.text.ParsePosition;
import java.text.ParseException;
+import org.opengis.geometry.DirectPosition;
import org.apache.sis.measure.Angle;
import org.apache.sis.referencing.crs.HardCodedCRS;
import org.apache.sis.test.mock.VerticalCRSMock;
+import org.apache.sis.test.DependsOnMethod;
import org.apache.sis.test.TestCase;
import org.junit.Test;
import static org.junit.Assert.*;
-import org.opengis.geometry.DirectPosition;
/**
@@ -65,6 +66,40 @@ public final strictfp class CoordinateFo
}
/**
+ * Tests parsing a coordinate in unknown CRS.
+ * The ordinate values are formatted as ordinary numbers.
+ *
+ * @throws ParseException if the parsing failed.
+ */
+ @Test
+ public void testParseUnknownCRS() throws ParseException {
+ final CoordinateFormat format = new CoordinateFormat(null, null);
+ final ParsePosition index = new ParsePosition(0);
+ DirectPosition position = format.parse("23.78 -12.74 127.9 3.25",
index);
+ assertArrayEquals(new double[] {23.78, -12.74, 127.9, 3.25},
position.getCoordinate(), STRICT);
+ assertEquals("ParsePosition.getErrorIndex()", -1,
index.getErrorIndex());
+ assertEquals("ParsePosition.getIndex()", 23, index.getIndex());
+ /*
+ * Try another point having a different number of position
+ * for verifying that no cached values are causing problem.
+ */
+ index.setIndex(0);
+ position = format.parse("4.64 10.25 -3.12", index);
+ assertArrayEquals(new double[] {4.64, 10.25, -3.12},
position.getCoordinate(), STRICT);
+ assertEquals("ParsePosition.getErrorIndex()", -1,
index.getErrorIndex());
+ assertEquals("ParsePosition.getIndex()", 16, index.getIndex());
+ /*
+ * Try again with a different separator.
+ */
+ format.setSeparator("; ");
+ index.setIndex(0);
+ position = format.parse("4.64; 10.25; -3.12", index);
+ assertArrayEquals(new double[] {4.64, 10.25, -3.12},
position.getCoordinate(), STRICT);
+ assertEquals("ParsePosition.getErrorIndex()", -1,
index.getErrorIndex());
+ assertEquals("ParsePosition.getIndex()", 18, index.getIndex());
+ }
+
+ /**
* Tests formatting a single vertical coordinate.
*/
@Test
@@ -85,6 +120,7 @@ public final strictfp class CoordinateFo
* Tests formatting a 4-dimensional geographic coordinate.
*/
@Test
+ @DependsOnMethod("testFormatUnknownCRS")
public void testFormatGeographic4D() {
/*
* For a 4-dimensional coordinate with a temporal CRS.
@@ -124,16 +160,30 @@ public final strictfp class CoordinateFo
*
* @throws ParseException if the parsing failed.
*/
-// @Test
+ @Test
+ @DependsOnMethod("testParseUnknownCRS")
public void testParseGeographic4D() throws ParseException {
final CoordinateFormat format = new CoordinateFormat(Locale.FRANCE,
TimeZone.getTimeZone("GMT+01:00"));
- final String anglePattern = "DD°MM.m′";
- final String datePattern = "dd-MM-yyyy HH:mm";
- final ParsePosition index = new ParsePosition(0);
- format.applyPattern(Angle.class, anglePattern);
- format.applyPattern(Date.class, datePattern);
+ format.applyPattern(Date.class, "dd-MM-yyyy HH:mm");
format.setDefaultCRS(HardCodedCRS.GEOID_4D);
- final DirectPosition pos = format.parse("23°46,8′E 12°44,4′S 127,9 m
22-09-2006 07:00", index);
- assertArrayEquals(new double[] {23.78, -12.74, 127.90, 54000.25},
pos.getCoordinate(), 0.005);
+ final ParsePosition index = new ParsePosition(11);
+ final DirectPosition pos = format.parse("(to skip); 23°46,8′E
12°44,4′S 127,9 m 22-09-2006 07:00 (ignore)", index);
+ assertArrayEquals(new double[] {23.78, -12.74, 127.90, 54000.25},
pos.getCoordinate(), STRICT);
+ assertEquals("ParsePosition.getErrorIndex()", -1,
index.getErrorIndex());
+ assertEquals("ParsePosition.getIndex()", 55, index.getIndex());
+ /*
+ * Tests error message when parsing the same string but with unknown
units of measurement.
+ */
+ index.setIndex(11);
+ try {
+ format.parse("(to skip); 23°46,8′E 12°44,4′S 127,9 Foo 22-09-2006
07:00", index);
+ fail("Should not have parsed a coordinate with unknown units.");
+ } catch (ParseException e) {
+ assertEquals("ParsePosition.getIndex()", 11,
index.getIndex());
+ assertEquals("ParsePosition.getErrorIndex()", 37,
index.getErrorIndex());
+ assertEquals("ParseException.getErrorOffset()", 37,
e.getErrorOffset());
+ assertEquals("Les caractères « Foo » après « 23°46,8′E 12°44,4′S
127,9 » sont inattendus.",
+ e.getLocalizedMessage()); // In the language
specified at CoordinateFormat construction time.
+ }
}
}
Modified:
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
---
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
[UTF-8] (original)
+++
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
[UTF-8] Fri Feb 3 06:59:46 2017
@@ -39,7 +39,7 @@ import org.apache.sis.internal.util.Loca
*
* @author Martin Desruisseaux (Geomatys)
* @since 0.3
- * @version 0.3
+ * @version 0.8
* @module
*/
@SuppressWarnings("CloneableClassWithoutClone") // Because this class does
not contain field that need to be cloned.
@@ -125,15 +125,34 @@ final class DefaultFormat extends Format
*/
@Override
public Object parseObject(String source, final ParsePosition pos) {
- final int length = source.length();
- final int index = CharSequences.skipLeadingWhitespaces(source,
pos.getIndex(), length);
- source = source.substring(index,
CharSequences.skipTrailingWhitespaces(source, index, length));
+ final int index = CharSequences.skipLeadingWhitespaces(source,
pos.getIndex(), source.length());
+ int end;
+ for (end = index; end < source.length(); end++) {
+ final char c = source.charAt(end);
+ switch (c) {
+ default: {
+ if (c >= '+' && c <= '9') continue;
+ break;
+ /*
+ * ASCII characters in above range are +,-./0123456789
+ * But the , and / characters are excluded by the case
below.
+ */
+ }
+ case ',': case '/': break;
+ case 'E': case 'e': continue;
+ }
+ break;
+ }
+ source = source.substring(index, end);
+ final Object value;
try {
- return valueOf(source);
+ value = valueOf(source);
} catch (NumberFormatException cause) {
pos.setErrorIndex(index);
return null;
}
+ pos.setIndex(end);
+ return value;
}
/**
Modified:
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
---
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
[UTF-8] (original)
+++
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
[UTF-8] Fri Feb 3 06:59:46 2017
@@ -308,8 +308,11 @@ abstract class AbstractUnit<Q extends Qu
/**
* Returns {@code true} if the given Unicode code point is a valid
character for a unit symbol.
* Current implementation accepts letters, subscripts and the degree sign,
but the set of legal
- * characters may be expanded in any future SIS version. The most
important goal is to avoid
- * confusion with exponents and to detect where a unit symbol ends.
+ * characters may be expanded in any future SIS version (however it should
never allow spaces).
+ * The goal is to avoid confusion with exponents and to detect where a
unit symbol ends.
+ *
+ * <p>Space characters must be excluded from the set of legal characters
because allowing them
+ * would make harder for {@link UnitFormat} to detect correctly where a
unit symbol ends.</p>
*
* <p>Note that some units defined in the {@link Units} class break this
rule. In particular,
* some of those units contains superscripts or division sign. But the
hard-coded symbols in
Modified:
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
---
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
[UTF-8] (original)
+++
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
[UTF-8] Fri Feb 3 06:59:46 2017
@@ -282,7 +282,7 @@ public class UnitFormat extends Format i
* Mapping from long localized and unlocalized names to unit instances.
* This map is used only for parsing and created when first needed.
*
- * @see #nameToUnit()
+ * @see #fromName(String)
*/
private transient volatile Map<String,Unit<?>> nameToUnit;
@@ -291,7 +291,7 @@ public class UnitFormat extends Format i
* if the user create many {@code UnitFormat} instances. Note that we do
not cache {@link #symbolToName} because
* {@link ResourceBundle} already provides its own caching mechanism.
*
- * @see #nameToUnit()
+ * @see #fromName(String)
*/
private static final WeakValueHashMap<Locale, Map<String,Unit<?>>> SHARED
= new WeakValueHashMap<>(Locale.class);
@@ -384,7 +384,8 @@ public class UnitFormat extends Format i
* <div class="section">Restriction on character set</div>
* Current implementation accepts only {@linkplain Character#isLetter(int)
letters},
* {@linkplain Characters#isSubScript(int) subscripts}, {@linkplain
Character#isSpaceChar(int) spaces}
- * (including non-breaking spaces but <strong>not</strong> CR/LF
characters) and the degree sign (°),
+ * (including non-breaking spaces but <strong>not</strong> CR/LF
characters), the degree sign (°) and
+ * a few other characters like underscore,
* but the set of legal characters may be expanded in future Apache SIS
versions.
* However the following restrictions are likely to remain:
*
@@ -405,7 +406,7 @@ public class UnitFormat extends Format i
ArgumentChecks.ensureNonEmpty("label", label);
for (int i=0; i < label.length();) {
final int c = label.codePointAt(i);
- if (!AbstractUnit.isSymbolChar(c) && !Character.isSpaceChar(c)) {
+ if (!AbstractUnit.isSymbolChar(c) && !Character.isSpaceChar(c)) {
// NOT Character.isWhitespace(int)
throw new
IllegalArgumentException(Errors.format(Errors.Keys.IllegalArgumentValue_2,
"label", label));
}
i += Character.charCount(c);
@@ -444,16 +445,36 @@ public class UnitFormat extends Format i
}
/**
- * Returns the mapping from long localized and unlocalized names to unit
instances.
- * This mapping is somewhat the converse of {@link #symbolToName()}, but
includes
+ * Returns the unit instance for the given long (un)localized or name.
+ * This method is somewhat the converse of {@link #symbolToName()}, but
recognizes also
* international and American spelling of unit names in addition of
localized names.
* The intend is to recognize "meter" as well as "metre".
*
* <p>While we said that {@code UnitFormat} is not thread safe, we make an
exception for this method
* for allowing the singleton {@link #INSTANCE} to parse symbols in a
multi-threads environment.</p>
*/
- @SuppressWarnings("ReturnOfCollectionOrArrayField")
- private Map<String,Unit<?>> nameToUnit() {
+ @SuppressWarnings("fallthrough")
+ private Unit<?> fromName(String uom) {
+ /*
+ * Before to search in resource bundles, check for degrees units. The
"deg" unit can be both angular
+ * and Celsius degrees. We try to resolve this ambiguity by looking
for the "C" suffix. We perform a
+ * special case for the degrees units because SI symbols are
case-sentive and unit names in resource
+ * bundles are case-insensitive, but the "deg" case is a mix of both.
+ */
+ if (uom.regionMatches(true, 0, "deg", 0, 3)) {
+ final int length = uom.length();
+ switch (length) {
+ case 3: return Units.DEGREE; // Exactly
"deg" (ignoring case)
+ case 5: final char c = uom.charAt(3);
+ if (c != '_' && !Character.isSpaceChar(c)) break;
+ // else fallthrough
+ case 4: switch (uom.charAt(length - 1)) {
+ case 'K': // Unicode
U+212A
+ case 'K': return Units.KELVIN; // Exactly
"degK" (ignoring case except for 'K')
+ case 'C': return Units.CELSIUS;
+ }
+ }
+ }
Map<String,Unit<?>> map = nameToUnit;
if (map == null) {
map = SHARED.get(locale);
@@ -489,7 +510,17 @@ public class UnitFormat extends Format i
}
nameToUnit = map;
}
- return map;
+ /*
+ * The 'nameToUnit' map contains plural forms (declared in
UnitAliases.properties),
+ * but we make a special case for "degrees", "metres" and "meters"
because they
+ * appear in numerous places.
+ */
+ uom = uom.replace('_', ' ').toLowerCase(locale);
+ uom =
CharSequences.replace(CharSequences.replace(CharSequences.replace(CharSequences.toASCII(uom),
+ "meters", "meter"),
+ "metres", "metre"),
+ "degrees", "degree").toString();
+ return map.get(uom);
}
/**
@@ -775,18 +806,33 @@ public class UnitFormat extends Format i
* Returns {@code true} if the given character is a digit in the sense of
the {@code UnitFormat} parser.
* Note that "digit" is taken here in a much more restrictive way than
{@link Character#isDigit(int)}.
*/
- private static boolean isDigit(final int c) {
+ private static boolean isDigit(final char c) {
return c >= '0' && c <= '9';
}
/**
* Returns {@code true} if the given character is the sign of a number
according the {@code UnitFormat} parser.
*/
- private static boolean isSign(final int c) {
+ private static boolean isSign(final char c) {
return c == '+' || c == '-';
}
/**
+ * Returns {@code true} if the given character sequence contains at least
one digit.
+ * This is a hack for allowing to recognize units like "100 feet" (in
principle not
+ * legal, but seen in practice). This verification has some value if
digits are not
+ * allowed as unit label or symbol.
+ */
+ private static boolean hasDigit(final CharSequence symbol, int lower,
final int upper) {
+ while (lower < upper) {
+ if (isDigit(symbol.charAt(lower++))) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
* Parses the given text as an instance of {@code Unit}.
* If the parse completes without reading the entire length of the text,
an exception is thrown.
*
@@ -810,7 +856,7 @@ public class UnitFormat extends Format i
final ParsePosition position = new ParsePosition(0);
final Unit<?> unit = parse(symbols, position);
final int length = symbols.length();
- final int unrecognized =
CharSequences.skipTrailingWhitespaces(symbols, position.getIndex(), length);
+ final int unrecognized = CharSequences.skipLeadingWhitespaces(symbols,
position.getIndex(), length);
if (unrecognized < length) {
throw new
ParserException(Errors.format(Errors.Keys.UnexpectedCharactersAfter_2,
CharSequences.trimWhitespaces(symbols, 0, unrecognized),
@@ -845,25 +891,27 @@ public class UnitFormat extends Format i
ArgumentChecks.ensureNonNull("position", position);
/*
* Check for authority codes (currently only EPSG, but more could be
added later).
- * If the unit is not an authority code (which is the most common
case), then we
- * will check for hard-coded unit symbols.
- *
- * DefinitionURI.codeOf(…) returns 'uom' directly (provided that
whitespaces were already trimmed)
- * if no ':' character were found, in which case the string is assumed
to be the code directly.
- * This is the intended behavior for AuthorityFactory, but in the
particular case of this method
- * we want to try to parse as a xpointer before to give up.
+ * Example: "urn:ogc:def:uom:EPSG::9001". If the unit is not an
authority code
+ * (which is the most common case), only then we will parse the unit
symbols.
*/
- int start = CharSequences.skipLeadingWhitespaces(symbols,
position.getIndex(), symbols.length());
- int end = XPaths.endOfURI(symbols, start);
- if (end >= 0) {
- final String uom = symbols.subSequence(start, end).toString();
+ int end = symbols.length();
+ int start = CharSequences.skipLeadingWhitespaces(symbols,
position.getIndex(), end);
+ int endOfURI = XPaths.endOfURI(symbols, start);
+ if (endOfURI >= 0) {
+ final String uom = symbols.subSequence(start, endOfURI).toString();
String code = DefinitionURI.codeOf("uom", Constants.EPSG, uom);
- if (code != null && code != uom) { // Really
identity check, see above comment.
+ /*
+ * DefinitionURI.codeOf(…) returns 'uom' directly (provided that
whitespaces were already trimmed)
+ * if no ':' character were found, in which case the string is
assumed to be the code directly.
+ * This is the intended behavior for AuthorityFactory, but in the
particular case of this method
+ * we want to try to parse as a xpointer before to give up.
+ */
+ if (code != null && code != uom) {
NumberFormatException failure = null;
try {
final Unit<?> unit =
Units.valueOfEPSG(Integer.parseInt(code));
if (unit != null) {
- position.setIndex(end);
+ position.setIndex(endOfURI);
return unit;
}
} catch (NumberFormatException e) {
@@ -871,12 +919,28 @@ public class UnitFormat extends Format i
}
throw (ParserException) new
ParserException(Errors.format(Errors.Keys.UnknownUnit_1,
Constants.EPSG + DefaultNameSpace.DEFAULT_SEPARATOR +
code),
- symbols, start + Math.max(0,
uom.indexOf(code))).initCause(failure);
+ symbols, start + Math.max(0,
uom.lastIndexOf(code))).initCause(failure);
}
+ /*
+ * Not an EPSG code. Maybe it is a URI like this example:
+ *
http://schemas.opengis.net/iso/19139/20070417/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])
+ *
+ * If we find such 'uom' value, we could replace 'symbols' by that
'uom'. But it would cause a wrong
+ * error index to be reported in case of parsing failure. We will
rather try to adjust the indices
+ * (and replace 'symbols' only in last resort).
+ */
code = XPaths.xpointer("uom", uom);
if (code != null) {
- symbols = code;
- start = 0;
+ final int base = start;
+ start = endOfURI - code.length();
+ do if (--start < base) { // Should never happen (see
above comment), but we are paranoiac.
+ symbols = code;
+ start = 0;
+ break;
+ } while (!CharSequences.regionMatches(symbols, start, code));
+ end = start + code.length();
+ } else {
+ endOfURI = -1;
}
}
/*
@@ -887,7 +951,7 @@ public class UnitFormat extends Format i
*/
int operation = NOOP; // Enumeration value: IMPLICIT,
MULTIPLY, DIVIDE.
Unit<?> unit = null;
- end = symbols.length();
+ boolean hasSpaces = false;
int i = start;
scan: for (int n; i < end; i += n) {
final int c = Character.codePointAt(symbols, i);
@@ -896,7 +960,7 @@ scan: for (int n; i < end; i += n) {
switch (c) {
/*
* For any character that are is not an operator or
parenthesis, either continue the scanning of
- * character or stop it, depending on whether the character is
valid for a unit symbol or not.
+ * characters or stop it, depending on whether the character
is valid for a unit symbol or not.
* In the later case, we consider that we reached the end of a
unit symbol.
*/
default: {
@@ -906,7 +970,11 @@ scan: for (int n; i < end; i += n) {
}
continue;
}
- if (Character.isSpaceChar(c) || Character.isDigit(c) ||
Characters.isSuperScript(c)) {
+ if (Character.isDigit(c) || Characters.isSuperScript(c)) {
+ continue;
+ }
+ if (Character.isSpaceChar(c)) { //
NOT Character.isWhitespace(int)
+ hasSpaces = true;
continue;
}
break scan;
@@ -972,15 +1040,54 @@ scan: for (int n; i < end; i += n) {
if (operation != IMPLICIT) {
unit = apply(operation, unit, parseSymbol(symbols, start, i));
}
+ hasSpaces = false;
operation = next;
start = i + n;
}
/*
- * At this point we either found an unrecognized character or reached
the end of string. Parse the
- * remaining characters as a unit and apply the pending unit operation
(multiplication or division).
+ * At this point we either found an unrecognized character or reached
the end of string. We will
+ * parse the remaining characters as a unit and apply the pending unit
operation (multiplication
+ * or division). But before, we need to check if the parsing should
stop at the first whitespace.
+ * This verification assumes that spaces are allowed only in labels
specified by the label(…)
+ * method and in resource bundles, not in labels specified by
AbstractUnit.alternate(String).
*/
- unit = apply(operation, unit, parseSymbol(symbols, start, i));
- position.setIndex(i);
+ Unit<?> component = null;
+ if (hasSpaces) {
+ end = i;
+ start = CharSequences.skipLeadingWhitespaces(symbols, start, i);
+search: while ((i = CharSequences.skipTrailingWhitespaces(symbols, start,
i)) > start) {
+ final String uom = symbols.subSequence(start, i).toString();
+ if ((component = labelToUnit.get(uom)) != null) break;
+ if ((component = fromName(uom)) != null) break;
+ int j=i, c;
+ do {
+ c = Character.codePointBefore(symbols, j);
+ j -= Character.charCount(c);
+ if (j <= start) break search;
+ } while (!Character.isWhitespace(c));
+ /*
+ * Really use Character.isWhitespace(c) above, not
Character.isSpaceChar(c), because we want
+ * to exclude non-breaking spaces. This block should be the
only place in UnitFormat class
+ * where we use isWhitespace(c) instead of isSpaceChar(c).
+ */
+ i = j; // Will become the index of first
space after search loop completion.
+ }
+ /*
+ * At this point we did not found any user-specified label or
localized name matching the substring.
+ * Assume that the parsing should stop at the first space, on the
basis that spaces are not allowed
+ * in unit symbols. We make an exception if we detect that the
part before the first space contains
+ * digits (not allowed in unit symbols neither), in which case the
substring may be something like
+ * "100 feet".
+ */
+ if (hasDigit(symbols, start, i)) {
+ i = end; // Restore the full length
(until the first illegal character).
+ }
+ }
+ if (component == null) {
+ component = parseSymbol(symbols, start, i);
+ }
+ unit = apply(operation, unit, component);
+ position.setIndex(endOfURI >= 0 ? endOfURI : i);
return unit;
}
@@ -1017,7 +1124,6 @@ scan: for (int n; i < end; i += n) {
* @return the parsed unit symbol (never {@code null}).
* @throws ParserException if a problem occurred while parsing the given
symbols.
*/
- @SuppressWarnings("fallthrough")
private Unit<?> parseSymbol(final CharSequence symbols, final int lower,
final int upper) throws ParserException {
final String uom = CharSequences.trimWhitespaces(symbols, lower,
upper).toString();
/*
@@ -1109,38 +1215,10 @@ scan: for (int n; i < end; i += n) {
}
}
/*
- * Check for degrees units. Note that "deg" could be both
angular and Celsius degrees.
- * We try to resolve this ambiguity in the code below by
looking for the "C" suffix.
- * We perform a special case for those checks because the
above check for unit symbol
- * is case-sentive, the check for unit name (later) is
case-insensitive, while this
- * check for "deg" is a mix of both.
- */
- if (uom.regionMatches(true, 0, "deg", 0, 3)) {
- switch (length) {
- case 3: return Units.DEGREE; //
Exactly "deg" (ignoring case)
- case 5: final char c = uom.charAt(3);
- if (c != '_' && !Character.isSpaceChar(c))
break;
- // else fallthrough
- case 4: switch (uom.charAt(length - 1)) {
- case 'K': //
Unicode U+212A
- case 'K': return Units.KELVIN; //
Exactly "degK" (ignoring case except for 'K')
- case 'C': return Units.CELSIUS;
- }
- }
- }
- /*
* At this point, we have determined that the label is not a
known unit symbol.
* It may be a unit name, in which case the label is not
case-sensitive anymore.
- * The 'nameToUnit' map contains plural forms (declared in
UnitAliases.properties),
- * but we make a special case for "degrees", "metres" and
"meters" because they
- * appear in numerous places.
*/
- String lc = uom.replace('_', ' ').toLowerCase(locale);
- lc =
CharSequences.replace(CharSequences.replace(CharSequences.replace(CharSequences.toASCII(lc),
- "meters", "meter"),
- "metres", "metre"),
- "degrees", "degree").toString();
- unit = nameToUnit().get(lc);
+ unit = fromName(uom);
if (unit == null) {
if (CharSequences.regionMatches(symbols, lower, UNITY,
true)) {
return Units.UNITY;
Modified:
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
---
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
[UTF-8] (original)
+++
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
[UTF-8] Fri Feb 3 06:59:46 2017
@@ -16,6 +16,7 @@
*/
package org.apache.sis.internal.util;
+import org.apache.sis.util.Characters;
import org.apache.sis.test.TestCase;
import org.junit.Test;
@@ -42,6 +43,7 @@ public final strictfp class XPathsTest e
assertEquals(97,
XPaths.endOfURI("http://schemas.opengis.net/iso/19139/20070417/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])",
0));
assertEquals(-1, XPaths.endOfURI("m/s", 0));
assertEquals(-1, XPaths.endOfURI("m.s", 0));
+ assertEquals(11, XPaths.endOfURI("EPSG" + Characters.NO_BREAK_SPACE +
": 9001", 0));
}
/**
Modified:
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
URL:
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
---
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
[UTF-8] (original)
+++
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
[UTF-8] Fri Feb 3 06:59:46 2017
@@ -19,6 +19,7 @@ package org.apache.sis.measure;
import java.util.Set;
import java.util.HashSet;
import java.util.Locale;
+import java.text.ParsePosition;
import java.lang.reflect.Field;
import javax.measure.Unit;
import javax.measure.format.ParserException;
@@ -281,6 +282,7 @@ public final strictfp class UnitFormatTe
fail("Should not accept unknown unit.");
} catch (ParserException e) {
final String message = e.getMessage();
+ assertTrue(message, message.contains("degree"));
assertTrue(message, message.contains("foo"));
}
// Tests with localisation.
@@ -289,7 +291,8 @@ public final strictfp class UnitFormatTe
fail("Should not accept localized unit unless requested.");
} catch (ParserException e) {
final String message = e.getMessage();
- assertTrue(message, message.contains("mètre cube"));
+ assertTrue(message, message.contains("mètre"));
+ assertTrue(message, message.contains("cube"));
}
f.setLocale(Locale.FRANCE);
assertSame(Units.CUBIC_METRE, f.parse("mètre cube"));
@@ -395,6 +398,29 @@ public final strictfp class UnitFormatTe
}
/**
+ * Tests parsing a unit from another position than zero and verifies that
{@code UnitFormat} detects
+ * correctly where the unit symbol ends.
+ */
+ @Test
+ @DependsOnMethod("testParseSymbol")
+ public void testParsePosition() {
+ final UnitFormat f = new UnitFormat(Locale.UK);
+ final ParsePosition pos = new ParsePosition(4);
+ assertSame(Units.CENTIMETRE, f.parse("ABC cm DEF", pos));
+ assertEquals("ParsePosition.getIndex()", 6, pos.getIndex());
+ assertEquals("ParsePosition.getErrorIndex()", -1, pos.getErrorIndex());
+ /*
+ * Adding "cm DEF" as a unit label should allow UnitFormat to
recognize those characters.
+ * We associate a random unit to that label, just for testing purpose.
+ */
+ pos.setIndex(4);
+ f.label(Units.HECTARE, "cm DEF");
+ assertSame(Units.HECTARE, f.parse("ABC cm DEF", pos));
+ assertEquals("ParsePosition.getIndex()", 10, pos.getIndex());
+ assertEquals("ParsePosition.getErrorIndex()", -1, pos.getErrorIndex());
+ }
+
+ /**
* Tests {@link UnitFormat#clone()}.
*/
@Test