measu...

desruisseaux Thu, 02 Feb 2017 23:00:01 -0800

Author: desruisseaux
Date: Fri Feb  3 06:59:46 2017
New Revision: 1781499

URL: http://svn.apache.org/viewvc?rev=1781499&view=rev
Log:
Implement CoordinateFormat.parse(...) method.
This require a fix in the way UnitFormat and DefaultFormat detect the end of 
the string that they have to parse.


Modified:
    
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
    
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
    
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
    
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
    
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
    
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
    
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java

Modified: 
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
URL: 
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
--- 
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
 [UTF-8] (original)
+++ 
sis/branches/JDK8/core/sis-referencing/src/main/java/org/apache/sis/geometry/CoordinateFormat.java
 [UTF-8] Fri Feb  3 06:59:46 2017
@@ -24,6 +24,7 @@ import java.text.DecimalFormat;
 import java.text.FieldPosition;
 import java.text.ParsePosition;
 import java.text.ParseException;
+import java.util.Arrays;
 import java.util.Date;
 import java.util.Locale;
 import java.util.TimeZone;
@@ -92,6 +93,19 @@ public class CoordinateFormat extends Co
     private static final long serialVersionUID = 8324486673169133932L;
 
     /**
+     * Maximal number of characters to convert to {@link String} if the text 
to parse is not a string instance.
+     * This is an arbitrary limit that may change (or be removed) in any 
future SIS version.
+     */
+    private static final int READ_AHEAD_LIMIT = 256;
+
+    /**
+     * Maximal number of dimensions to use when parsing a coordinate without 
{@link #defaultCRS}.
+     * This is an arbitrary limit that may change (or be removed) in any 
future SIS version.
+     * To avoid this limitation, users are encouraged to specify a default CRS.
+     */
+    private static final int DEFAULT_DIMENSION = 4;
+
+    /**
      * The separator between each coordinate values to be formatted.
      * The default value is a space.
      */
@@ -567,6 +581,8 @@ public class CoordinateFormat extends Co
     public DirectPosition parse(final CharSequence text, final ParsePosition 
pos) throws ParseException {
         ArgumentChecks.ensureNonNull("text", text);
         ArgumentChecks.ensureNonNull("pos",  pos);
+        final int start  = pos.getIndex();
+        final int length = text.length();
         /*
          * The NumberFormat, DateFormat and AngleFormat work only on String 
values, not on CharSequence.
          * If the given text is not a String, we will convert an arbitrarily 
small section of the given
@@ -580,9 +596,9 @@ public class CoordinateFormat extends Co
             subPos   = pos;
             asString = (String) text;
         } else {
-            offset   = pos.getIndex();
+            offset   = start;
             subPos   = new ParsePosition(0);
-            asString = text.subSequence(offset, Math.min(offset + 256, 
text.length())).toString();
+            asString = text.subSequence(start, Math.min(start + 
READ_AHEAD_LIMIT, length)).toString();
         }
         /*
          * The Format instances to be used for each ordinate values is 
determined by the default CRS.
@@ -591,9 +607,53 @@ public class CoordinateFormat extends Co
         if (lastCRS != defaultCRS) {
             initialize(defaultCRS);
         }
-        double[] ordinates = new double[formats.length];        // TODO: null 
if no CRS has been specified.
+        final double[] ordinates;
+        Format format;
+        final Format[] formats = this.formats;
+        if (formats != null) {
+            format    = null;
+            ordinates = new double[formats.length];
+        } else {
+            format    = getFormat(Number.class);
+            ordinates = new double[DEFAULT_DIMENSION];
+        }
+        /*
+         * For each ordinate value except the first one, we need to skip the 
separator.
+         * If we do not find the separator, we may consider that we reached 
the coordinate
+         * end ahead of time. We currently allow that only for coordinate 
without CRS.
+         */
         for (int i=0; i < ordinates.length; i++) {
-            final Object object = formats[i].parseObject(asString, subPos);
+            if (i != 0) {
+                final int end = subPos.getIndex();
+                int index = offset + end;
+                while (!CharSequences.regionMatches(text, index, separator)) {
+                    if (index < length) {
+                        final int c = Character.codePointAt(text, index);
+                        if (Character.isSpaceChar(c)) {
+                            index += Character.charCount(c);
+                            continue;
+                        }
+                    }
+                    if (formats == null) {
+                        pos.setIndex(index);
+                        return new 
GeneralDirectPosition(Arrays.copyOf(ordinates, i));
+                    }
+                    pos.setIndex(start);
+                    pos.setErrorIndex(index);
+                    throw new LocalizedParseException(getLocale(), 
Errors.Keys.UnexpectedCharactersAfter_2,
+                            new CharSequence[] {text.subSequence(start, end), 
CharSequences.token(text, index)}, index);
+                }
+                subPos.setIndex(index + separator.length() - offset);
+            }
+            /*
+             * At this point 'subPos' is set to the beginning of the next 
ordinate to parse in 'asString'.
+             * Parse the value as a number, angle or date, as determined from 
the coordinate system axis.
+             */
+            if (formats != null) {
+                format = formats[i];
+            }
+            @SuppressWarnings("null")
+            final Object object = format.parseObject(asString, subPos);
             if (object == null) {
                 /*
                  * If we failed to parse, build an error message with the type 
that was expected for that ordinate.
@@ -609,7 +669,7 @@ public class CoordinateFormat extends Co
                         case DATE:      type = Date.class;      break;
                     }
                 }
-                pos.setIndex(offset);
+                pos.setIndex(start);
                 if (subPos != pos) {
                     pos.setErrorIndex(offset + subPos.getErrorIndex());
                 }
@@ -656,7 +716,7 @@ public class CoordinateFormat extends Co
                             value = ((Unit<?>) 
unit).getConverterToAny(target).convert(value);
                         } catch (IncommensurableException e) {
                             index += offset;
-                            pos.setIndex(offset);
+                            pos.setIndex(start);
                             pos.setErrorIndex(index);
                             throw (ParseException) new 
ParseException(e.getMessage(), index).initCause(e);
                         }
@@ -674,20 +734,6 @@ public class CoordinateFormat extends Co
                 value = -value;
             }
             ordinates[i] = value;
-            /*
-             * We require the separator to be present before to continue.
-             */
-            final int index = offset + subPos.getIndex();
-            if (!CharSequences.regionMatches(text, index, separator)) {
-                if (i+1 == ordinates.length) {
-                    break;
-                }
-                pos.setIndex(offset);
-                pos.setErrorIndex(index);
-                throw new LocalizedParseException(getLocale(), 
Errors.Keys.UnexpectedCharactersAfter_2,
-                        new CharSequence[] {text.subSequence(offset, index), 
CharSequences.token(text, index)}, index);
-            }
-            subPos.setIndex(index + separator.length() - offset);
         }
         final GeneralDirectPosition position = new 
GeneralDirectPosition(ordinates);
         position.setCoordinateReferenceSystem(defaultCRS);

Modified: 
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
URL: 
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
--- 
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
 [UTF-8] (original)
+++ 
sis/branches/JDK8/core/sis-referencing/src/test/java/org/apache/sis/geometry/CoordinateFormatTest.java
 [UTF-8] Fri Feb  3 06:59:46 2017
@@ -21,14 +21,15 @@ import java.util.Locale;
 import java.util.TimeZone;
 import java.text.ParsePosition;
 import java.text.ParseException;
+import org.opengis.geometry.DirectPosition;
 import org.apache.sis.measure.Angle;
 import org.apache.sis.referencing.crs.HardCodedCRS;
 import org.apache.sis.test.mock.VerticalCRSMock;
+import org.apache.sis.test.DependsOnMethod;
 import org.apache.sis.test.TestCase;
 import org.junit.Test;
 
 import static org.junit.Assert.*;
-import org.opengis.geometry.DirectPosition;
 
 
 /**
@@ -65,6 +66,40 @@ public final strictfp class CoordinateFo
     }
 
     /**
+     * Tests parsing a coordinate in unknown CRS.
+     * The ordinate values are formatted as ordinary numbers.
+     *
+     * @throws ParseException if the parsing failed.
+     */
+    @Test
+    public void testParseUnknownCRS() throws ParseException {
+        final CoordinateFormat format = new CoordinateFormat(null, null);
+        final ParsePosition index = new ParsePosition(0);
+        DirectPosition position = format.parse("23.78 -12.74 127.9 3.25", 
index);
+        assertArrayEquals(new double[] {23.78, -12.74, 127.9, 3.25}, 
position.getCoordinate(), STRICT);
+        assertEquals("ParsePosition.getErrorIndex()", -1, 
index.getErrorIndex());
+        assertEquals("ParsePosition.getIndex()",      23, index.getIndex());
+        /*
+         * Try another point having a different number of position
+         * for verifying that no cached values are causing problem.
+         */
+        index.setIndex(0);
+        position = format.parse("4.64 10.25 -3.12", index);
+        assertArrayEquals(new double[] {4.64, 10.25, -3.12}, 
position.getCoordinate(), STRICT);
+        assertEquals("ParsePosition.getErrorIndex()", -1, 
index.getErrorIndex());
+        assertEquals("ParsePosition.getIndex()",      16, index.getIndex());
+        /*
+         * Try again with a different separator.
+         */
+        format.setSeparator("; ");
+        index.setIndex(0);
+        position = format.parse("4.64; 10.25; -3.12", index);
+        assertArrayEquals(new double[] {4.64, 10.25, -3.12}, 
position.getCoordinate(), STRICT);
+        assertEquals("ParsePosition.getErrorIndex()", -1, 
index.getErrorIndex());
+        assertEquals("ParsePosition.getIndex()",      18, index.getIndex());
+    }
+
+    /**
      * Tests formatting a single vertical coordinate.
      */
     @Test
@@ -85,6 +120,7 @@ public final strictfp class CoordinateFo
      * Tests formatting a 4-dimensional geographic coordinate.
      */
     @Test
+    @DependsOnMethod("testFormatUnknownCRS")
     public void testFormatGeographic4D() {
         /*
          * For a 4-dimensional coordinate with a temporal CRS.
@@ -124,16 +160,30 @@ public final strictfp class CoordinateFo
      *
      * @throws ParseException if the parsing failed.
      */
-//  @Test
+    @Test
+    @DependsOnMethod("testParseUnknownCRS")
     public void testParseGeographic4D() throws ParseException {
         final CoordinateFormat format = new CoordinateFormat(Locale.FRANCE, 
TimeZone.getTimeZone("GMT+01:00"));
-        final String anglePattern = "DD°MM.m′";
-        final String  datePattern = "dd-MM-yyyy HH:mm";
-        final ParsePosition index = new ParsePosition(0);
-        format.applyPattern(Angle.class,  anglePattern);
-        format.applyPattern(Date.class,    datePattern);
+        format.applyPattern(Date.class, "dd-MM-yyyy HH:mm");
         format.setDefaultCRS(HardCodedCRS.GEOID_4D);
-        final DirectPosition pos = format.parse("23°46,8′E 12°44,4′S 127,9 m 
22-09-2006 07:00", index);
-        assertArrayEquals(new double[] {23.78, -12.74, 127.90, 54000.25}, 
pos.getCoordinate(), 0.005);
+        final ParsePosition index = new ParsePosition(11);
+        final DirectPosition pos = format.parse("(to skip); 23°46,8′E 
12°44,4′S 127,9 m 22-09-2006 07:00 (ignore)", index);
+        assertArrayEquals(new double[] {23.78, -12.74, 127.90, 54000.25}, 
pos.getCoordinate(), STRICT);
+        assertEquals("ParsePosition.getErrorIndex()", -1, 
index.getErrorIndex());
+        assertEquals("ParsePosition.getIndex()",      55, index.getIndex());
+        /*
+         * Tests error message when parsing the same string but with unknown 
units of measurement.
+         */
+        index.setIndex(11);
+        try {
+            format.parse("(to skip); 23°46,8′E 12°44,4′S 127,9 Foo 22-09-2006 
07:00", index);
+            fail("Should not have parsed a coordinate with unknown units.");
+        } catch (ParseException e) {
+            assertEquals("ParsePosition.getIndex()",        11, 
index.getIndex());
+            assertEquals("ParsePosition.getErrorIndex()",   37, 
index.getErrorIndex());
+            assertEquals("ParseException.getErrorOffset()", 37, 
e.getErrorOffset());
+            assertEquals("Les caractères « Foo » après « 23°46,8′E 12°44,4′S 
127,9 » sont inattendus.",
+                         e.getLocalizedMessage());  // In the language 
specified at CoordinateFormat construction time.
+        }
     }
 }

Modified: 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
URL: 
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
--- 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
 [UTF-8] (original)
+++ 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/io/DefaultFormat.java
 [UTF-8] Fri Feb  3 06:59:46 2017
@@ -39,7 +39,7 @@ import org.apache.sis.internal.util.Loca
  *
  * @author  Martin Desruisseaux (Geomatys)
  * @since   0.3
- * @version 0.3
+ * @version 0.8
  * @module
  */
 @SuppressWarnings("CloneableClassWithoutClone")   // Because this class does 
not contain field that need to be cloned.
@@ -125,15 +125,34 @@ final class DefaultFormat extends Format
      */
     @Override
     public Object parseObject(String source, final ParsePosition pos) {
-        final int length = source.length();
-        final int index = CharSequences.skipLeadingWhitespaces(source, 
pos.getIndex(), length);
-        source = source.substring(index, 
CharSequences.skipTrailingWhitespaces(source, index, length));
+        final int index = CharSequences.skipLeadingWhitespaces(source, 
pos.getIndex(), source.length());
+        int end;
+        for (end = index; end < source.length(); end++) {
+            final char c = source.charAt(end);
+            switch (c) {
+                default: {
+                    if (c >= '+' && c <= '9') continue;
+                    break;
+                    /*
+                     * ASCII characters in above range are +,-./0123456789
+                     * But the , and / characters are excluded by the case 
below.
+                     */
+                }
+                case ',': case '/': break;
+                case 'E': case 'e': continue;
+            }
+            break;
+        }
+        source = source.substring(index, end);
+        final Object value;
         try {
-            return valueOf(source);
+            value = valueOf(source);
         } catch (NumberFormatException cause) {
             pos.setErrorIndex(index);
             return null;
         }
+        pos.setIndex(end);
+        return value;
     }
 
     /**

Modified: 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
URL: 
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
--- 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
 [UTF-8] (original)
+++ 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/AbstractUnit.java
 [UTF-8] Fri Feb  3 06:59:46 2017
@@ -308,8 +308,11 @@ abstract class AbstractUnit<Q extends Qu
     /**
      * Returns {@code true} if the given Unicode code point is a valid 
character for a unit symbol.
      * Current implementation accepts letters, subscripts and the degree sign, 
but the set of legal
-     * characters may be expanded in any future SIS version. The most 
important goal is to avoid
-     * confusion with exponents and to detect where a unit symbol ends.
+     * characters may be expanded in any future SIS version (however it should 
never allow spaces).
+     * The goal is to avoid confusion with exponents and to detect where a 
unit symbol ends.
+     *
+     * <p>Space characters must be excluded from the set of legal characters 
because allowing them
+     * would make harder for {@link UnitFormat} to detect correctly where a 
unit symbol ends.</p>
      *
      * <p>Note that some units defined in the {@link Units} class break this 
rule. In particular,
      * some of those units contains superscripts or division sign. But the 
hard-coded symbols in

Modified: 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
URL: 
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
--- 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
 [UTF-8] (original)
+++ 
sis/branches/JDK8/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
 [UTF-8] Fri Feb  3 06:59:46 2017
@@ -282,7 +282,7 @@ public class UnitFormat extends Format i
      * Mapping from long localized and unlocalized names to unit instances.
      * This map is used only for parsing and created when first needed.
      *
-     * @see #nameToUnit()
+     * @see #fromName(String)
      */
     private transient volatile Map<String,Unit<?>> nameToUnit;
 
@@ -291,7 +291,7 @@ public class UnitFormat extends Format i
      * if the user create many {@code UnitFormat} instances. Note that we do 
not cache {@link #symbolToName} because
      * {@link ResourceBundle} already provides its own caching mechanism.
      *
-     * @see #nameToUnit()
+     * @see #fromName(String)
      */
     private static final WeakValueHashMap<Locale, Map<String,Unit<?>>> SHARED 
= new WeakValueHashMap<>(Locale.class);
 
@@ -384,7 +384,8 @@ public class UnitFormat extends Format i
      * <div class="section">Restriction on character set</div>
      * Current implementation accepts only {@linkplain Character#isLetter(int) 
letters},
      * {@linkplain Characters#isSubScript(int) subscripts}, {@linkplain 
Character#isSpaceChar(int) spaces}
-     * (including non-breaking spaces but <strong>not</strong> CR/LF 
characters) and the degree sign (°),
+     * (including non-breaking spaces but <strong>not</strong> CR/LF 
characters), the degree sign (°) and
+     * a few other characters like underscore,
      * but the set of legal characters may be expanded in future Apache SIS 
versions.
      * However the following restrictions are likely to remain:
      *
@@ -405,7 +406,7 @@ public class UnitFormat extends Format i
         ArgumentChecks.ensureNonEmpty("label", label);
         for (int i=0; i < label.length();) {
             final int c = label.codePointAt(i);
-            if (!AbstractUnit.isSymbolChar(c) && !Character.isSpaceChar(c)) {
+            if (!AbstractUnit.isSymbolChar(c) && !Character.isSpaceChar(c)) {  
     // NOT Character.isWhitespace(int)
                 throw new 
IllegalArgumentException(Errors.format(Errors.Keys.IllegalArgumentValue_2, 
"label", label));
             }
             i += Character.charCount(c);
@@ -444,16 +445,36 @@ public class UnitFormat extends Format i
     }
 
     /**
-     * Returns the mapping from long localized and unlocalized names to unit 
instances.
-     * This mapping is somewhat the converse of {@link #symbolToName()}, but 
includes
+     * Returns the unit instance for the given long (un)localized or name.
+     * This method is somewhat the converse of {@link #symbolToName()}, but 
recognizes also
      * international and American spelling of unit names in addition of 
localized names.
      * The intend is to recognize "meter" as well as "metre".
      *
      * <p>While we said that {@code UnitFormat} is not thread safe, we make an 
exception for this method
      * for allowing the singleton {@link #INSTANCE} to parse symbols in a 
multi-threads environment.</p>
      */
-    @SuppressWarnings("ReturnOfCollectionOrArrayField")
-    private Map<String,Unit<?>> nameToUnit() {
+    @SuppressWarnings("fallthrough")
+    private Unit<?> fromName(String uom) {
+        /*
+         * Before to search in resource bundles, check for degrees units. The 
"deg" unit can be both angular
+         * and Celsius degrees. We try to resolve this ambiguity by looking 
for the "C" suffix. We perform a
+         * special case for the degrees units because SI symbols are 
case-sentive and unit names in resource
+         * bundles are case-insensitive, but the "deg" case is a mix of both.
+         */
+        if (uom.regionMatches(true, 0, "deg", 0, 3)) {
+            final int length = uom.length();
+            switch (length) {
+                case 3: return Units.DEGREE;                    // Exactly 
"deg"  (ignoring case)
+                case 5: final char c = uom.charAt(3);
+                        if (c != '_' && !Character.isSpaceChar(c)) break;
+                        // else fallthrough
+                case 4: switch (uom.charAt(length - 1)) {
+                            case 'K':                           // Unicode 
U+212A
+                            case 'K': return Units.KELVIN;      // Exactly 
"degK" (ignoring case except for 'K')
+                            case 'C': return Units.CELSIUS;
+                        }
+            }
+        }
         Map<String,Unit<?>> map = nameToUnit;
         if (map == null) {
             map = SHARED.get(locale);
@@ -489,7 +510,17 @@ public class UnitFormat extends Format i
             }
             nameToUnit = map;
         }
-        return map;
+        /*
+         * The 'nameToUnit' map contains plural forms (declared in 
UnitAliases.properties),
+         * but we make a special case for "degrees", "metres" and "meters" 
because they
+         * appear in numerous places.
+         */
+        uom = uom.replace('_', ' ').toLowerCase(locale);
+        uom = 
CharSequences.replace(CharSequences.replace(CharSequences.replace(CharSequences.toASCII(uom),
+                "meters",  "meter"),
+                "metres",  "metre"),
+                "degrees", "degree").toString();
+        return map.get(uom);
     }
 
     /**
@@ -775,18 +806,33 @@ public class UnitFormat extends Format i
      * Returns {@code true} if the given character is a digit in the sense of 
the {@code UnitFormat} parser.
      * Note that "digit" is taken here in a much more restrictive way than 
{@link Character#isDigit(int)}.
      */
-    private static boolean isDigit(final int c) {
+    private static boolean isDigit(final char c) {
         return c >= '0' && c <= '9';
     }
 
     /**
      * Returns {@code true} if the given character is the sign of a number 
according the {@code UnitFormat} parser.
      */
-    private static boolean isSign(final int c) {
+    private static boolean isSign(final char c) {
         return c == '+' || c == '-';
     }
 
     /**
+     * Returns {@code true} if the given character sequence contains at least 
one digit.
+     * This is a hack for allowing to recognize units like "100 feet" (in 
principle not
+     * legal, but seen in practice). This verification has some value if 
digits are not
+     * allowed as unit label or symbol.
+     */
+    private static boolean hasDigit(final CharSequence symbol, int lower, 
final int upper) {
+        while (lower < upper) {
+            if (isDigit(symbol.charAt(lower++))) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
      * Parses the given text as an instance of {@code Unit}.
      * If the parse completes without reading the entire length of the text, 
an exception is thrown.
      *
@@ -810,7 +856,7 @@ public class UnitFormat extends Format i
         final ParsePosition position = new ParsePosition(0);
         final Unit<?> unit = parse(symbols, position);
         final int length = symbols.length();
-        final int unrecognized = 
CharSequences.skipTrailingWhitespaces(symbols, position.getIndex(), length);
+        final int unrecognized = CharSequences.skipLeadingWhitespaces(symbols, 
position.getIndex(), length);
         if (unrecognized < length) {
             throw new 
ParserException(Errors.format(Errors.Keys.UnexpectedCharactersAfter_2,
                     CharSequences.trimWhitespaces(symbols, 0, unrecognized),
@@ -845,25 +891,27 @@ public class UnitFormat extends Format i
         ArgumentChecks.ensureNonNull("position", position);
         /*
          * Check for authority codes (currently only EPSG, but more could be 
added later).
-         * If the unit is not an authority code (which is the most common 
case), then we
-         * will check for hard-coded unit symbols.
-         *
-         * DefinitionURI.codeOf(…) returns 'uom' directly (provided that 
whitespaces were already trimmed)
-         * if no ':' character were found, in which case the string is assumed 
to be the code directly.
-         * This is the intended behavior for AuthorityFactory, but in the 
particular case of this method
-         * we want to try to parse as a xpointer before to give up.
+         * Example: "urn:ogc:def:uom:EPSG::9001". If the unit is not an 
authority code
+         * (which is the most common case), only then we will parse the unit 
symbols.
          */
-        int start = CharSequences.skipLeadingWhitespaces(symbols, 
position.getIndex(), symbols.length());
-        int end = XPaths.endOfURI(symbols, start);
-        if (end >= 0) {
-            final String uom = symbols.subSequence(start, end).toString();
+        int end   = symbols.length();
+        int start = CharSequences.skipLeadingWhitespaces(symbols, 
position.getIndex(), end);
+        int endOfURI = XPaths.endOfURI(symbols, start);
+        if (endOfURI >= 0) {
+            final String uom = symbols.subSequence(start, endOfURI).toString();
             String code = DefinitionURI.codeOf("uom", Constants.EPSG, uom);
-            if (code != null && code != uom) {                  // Really 
identity check, see above comment.
+            /*
+             * DefinitionURI.codeOf(…) returns 'uom' directly (provided that 
whitespaces were already trimmed)
+             * if no ':' character were found, in which case the string is 
assumed to be the code directly.
+             * This is the intended behavior for AuthorityFactory, but in the 
particular case of this method
+             * we want to try to parse as a xpointer before to give up.
+             */
+            if (code != null && code != uom) {
                 NumberFormatException failure = null;
                 try {
                     final Unit<?> unit = 
Units.valueOfEPSG(Integer.parseInt(code));
                     if (unit != null) {
-                        position.setIndex(end);
+                        position.setIndex(endOfURI);
                         return unit;
                     }
                 } catch (NumberFormatException e) {
@@ -871,12 +919,28 @@ public class UnitFormat extends Format i
                 }
                 throw (ParserException) new 
ParserException(Errors.format(Errors.Keys.UnknownUnit_1,
                         Constants.EPSG + DefaultNameSpace.DEFAULT_SEPARATOR + 
code),
-                        symbols, start + Math.max(0, 
uom.indexOf(code))).initCause(failure);
+                        symbols, start + Math.max(0, 
uom.lastIndexOf(code))).initCause(failure);
             }
+            /*
+             * Not an EPSG code. Maybe it is a URI like this example:
+             * 
http://schemas.opengis.net/iso/19139/20070417/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])
+             *
+             * If we find such 'uom' value, we could replace 'symbols' by that 
'uom'. But it would cause a wrong
+             * error index to be reported in case of parsing failure. We will 
rather try to adjust the indices
+             * (and replace 'symbols' only in last resort).
+             */
             code = XPaths.xpointer("uom", uom);
             if (code != null) {
-                symbols = code;
-                start = 0;
+                final int base = start;
+                start = endOfURI - code.length();
+                do if (--start < base) {          // Should never happen (see 
above comment), but we are paranoiac.
+                    symbols = code;
+                    start = 0;
+                    break;
+                } while (!CharSequences.regionMatches(symbols, start, code));
+                end = start + code.length();
+            } else {
+                endOfURI = -1;
             }
         }
         /*
@@ -887,7 +951,7 @@ public class UnitFormat extends Format i
          */
         int operation = NOOP;            // Enumeration value: IMPLICIT, 
MULTIPLY, DIVIDE.
         Unit<?> unit = null;
-        end = symbols.length();
+        boolean hasSpaces = false;
         int i = start;
 scan:   for (int n; i < end; i += n) {
             final int c = Character.codePointAt(symbols, i);
@@ -896,7 +960,7 @@ scan:   for (int n; i < end; i += n) {
             switch (c) {
                 /*
                  * For any character that are is not an operator or 
parenthesis, either continue the scanning of
-                 * character or stop it, depending on whether the character is 
valid for a unit symbol or not.
+                 * characters or stop it, depending on whether the character 
is valid for a unit symbol or not.
                  * In the later case, we consider that we reached the end of a 
unit symbol.
                  */
                 default:  {
@@ -906,7 +970,11 @@ scan:   for (int n; i < end; i += n) {
                         }
                         continue;
                     }
-                    if (Character.isSpaceChar(c) || Character.isDigit(c) || 
Characters.isSuperScript(c)) {
+                    if (Character.isDigit(c) || Characters.isSuperScript(c)) {
+                        continue;
+                    }
+                    if (Character.isSpaceChar(c)) {                         // 
NOT Character.isWhitespace(int)
+                        hasSpaces = true;
                         continue;
                     }
                     break scan;
@@ -972,15 +1040,54 @@ scan:   for (int n; i < end; i += n) {
             if (operation != IMPLICIT) {
                 unit = apply(operation, unit, parseSymbol(symbols, start, i));
             }
+            hasSpaces = false;
             operation = next;
             start = i + n;
         }
         /*
-         * At this point we either found an unrecognized character or reached 
the end of string. Parse the
-         * remaining characters as a unit and apply the pending unit operation 
(multiplication or division).
+         * At this point we either found an unrecognized character or reached 
the end of string. We will
+         * parse the remaining characters as a unit and apply the pending unit 
operation (multiplication
+         * or division). But before, we need to check if the parsing should 
stop at the first whitespace.
+         * This verification assumes that spaces are allowed only in labels 
specified by the label(…)
+         * method and in resource bundles, not in labels specified by 
AbstractUnit.alternate(String).
          */
-        unit = apply(operation, unit, parseSymbol(symbols, start, i));
-        position.setIndex(i);
+        Unit<?> component = null;
+        if (hasSpaces) {
+            end = i;
+            start = CharSequences.skipLeadingWhitespaces(symbols, start, i);
+search:     while ((i = CharSequences.skipTrailingWhitespaces(symbols, start, 
i)) > start) {
+                final String uom = symbols.subSequence(start, i).toString();
+                if ((component = labelToUnit.get(uom)) != null) break;
+                if ((component =        fromName(uom)) != null) break;
+                int j=i, c;
+                do {
+                    c = Character.codePointBefore(symbols, j);
+                    j -= Character.charCount(c);
+                    if (j <= start) break search;
+                } while (!Character.isWhitespace(c));
+                /*
+                 * Really use Character.isWhitespace(c) above, not 
Character.isSpaceChar(c), because we want
+                 * to exclude non-breaking spaces.   This block should be the 
only place in UnitFormat class
+                 * where we use isWhitespace(c) instead of isSpaceChar(c).
+                 */
+                i = j;                  // Will become the index of first 
space after search loop completion.
+            }
+            /*
+             * At this point we did not found any user-specified label or 
localized name matching the substring.
+             * Assume that the parsing should stop at the first space, on the 
basis that spaces are not allowed
+             * in unit symbols. We make an exception if we detect that the 
part before the first space contains
+             * digits (not allowed in unit symbols neither), in which case the 
substring may be something like
+             * "100 feet".
+             */
+            if (hasDigit(symbols, start, i)) {
+                i = end;                        // Restore the full length 
(until the first illegal character).
+            }
+        }
+        if (component == null) {
+            component = parseSymbol(symbols, start, i);
+        }
+        unit = apply(operation, unit, component);
+        position.setIndex(endOfURI >= 0 ? endOfURI : i);
         return unit;
     }
 
@@ -1017,7 +1124,6 @@ scan:   for (int n; i < end; i += n) {
      * @return the parsed unit symbol (never {@code null}).
      * @throws ParserException if a problem occurred while parsing the given 
symbols.
      */
-    @SuppressWarnings("fallthrough")
     private Unit<?> parseSymbol(final CharSequence symbols, final int lower, 
final int upper) throws ParserException {
         final String uom = CharSequences.trimWhitespaces(symbols, lower, 
upper).toString();
         /*
@@ -1109,38 +1215,10 @@ scan:   for (int n; i < end; i += n) {
                     }
                 }
                 /*
-                 * Check for degrees units. Note that "deg" could be both 
angular and Celsius degrees.
-                 * We try to resolve this ambiguity in the code below by 
looking for the "C" suffix.
-                 * We perform a special case for those checks because the 
above check for unit symbol
-                 * is case-sentive, the check for unit name (later) is 
case-insensitive, while this
-                 * check for "deg" is a mix of both.
-                 */
-                if (uom.regionMatches(true, 0, "deg", 0, 3)) {
-                    switch (length) {
-                        case 3: return Units.DEGREE;                    // 
Exactly "deg"  (ignoring case)
-                        case 5: final char c = uom.charAt(3);
-                                if (c != '_' && !Character.isSpaceChar(c)) 
break;
-                                // else fallthrough
-                        case 4: switch (uom.charAt(length - 1)) {
-                                    case 'K':                           // 
Unicode U+212A
-                                    case 'K': return Units.KELVIN;      // 
Exactly "degK" (ignoring case except for 'K')
-                                    case 'C': return Units.CELSIUS;
-                                }
-                    }
-                }
-                /*
                  * At this point, we have determined that the label is not a 
known unit symbol.
                  * It may be a unit name, in which case the label is not 
case-sensitive anymore.
-                 * The 'nameToUnit' map contains plural forms (declared in 
UnitAliases.properties),
-                 * but we make a special case for "degrees", "metres" and 
"meters" because they
-                 * appear in numerous places.
                  */
-                String lc = uom.replace('_', ' ').toLowerCase(locale);
-                lc = 
CharSequences.replace(CharSequences.replace(CharSequences.replace(CharSequences.toASCII(lc),
-                        "meters",  "meter"),
-                        "metres",  "metre"),
-                        "degrees", "degree").toString();
-                unit = nameToUnit().get(lc);
+                unit = fromName(uom);
                 if (unit == null) {
                     if (CharSequences.regionMatches(symbols, lower, UNITY, 
true)) {
                         return Units.UNITY;

Modified: 
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
URL: 
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
--- 
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
 [UTF-8] (original)
+++ 
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/internal/util/XPathsTest.java
 [UTF-8] Fri Feb  3 06:59:46 2017
@@ -16,6 +16,7 @@
  */
 package org.apache.sis.internal.util;
 
+import org.apache.sis.util.Characters;
 import org.apache.sis.test.TestCase;
 import org.junit.Test;
 
@@ -42,6 +43,7 @@ public final strictfp class XPathsTest e
         assertEquals(97, 
XPaths.endOfURI("http://schemas.opengis.net/iso/19139/20070417/resources/uom/gmxUom.xml#xpointer(//*[@gml:id='m'])",
 0));
         assertEquals(-1, XPaths.endOfURI("m/s", 0));
         assertEquals(-1, XPaths.endOfURI("m.s", 0));
+        assertEquals(11, XPaths.endOfURI("EPSG" + Characters.NO_BREAK_SPACE + 
": 9001", 0));
     }
 
     /**

Modified: 
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
URL: 
http://svn.apache.org/viewvc/sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java?rev=1781499&r1=1781498&r2=1781499&view=diff
==============================================================================
--- 
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
 [UTF-8] (original)
+++ 
sis/branches/JDK8/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
 [UTF-8] Fri Feb  3 06:59:46 2017
@@ -19,6 +19,7 @@ package org.apache.sis.measure;
 import java.util.Set;
 import java.util.HashSet;
 import java.util.Locale;
+import java.text.ParsePosition;
 import java.lang.reflect.Field;
 import javax.measure.Unit;
 import javax.measure.format.ParserException;
@@ -281,6 +282,7 @@ public final strictfp class UnitFormatTe
             fail("Should not accept unknown unit.");
         } catch (ParserException e) {
             final String message = e.getMessage();
+            assertTrue(message, message.contains("degree"));
             assertTrue(message, message.contains("foo"));
         }
         // Tests with localisation.
@@ -289,7 +291,8 @@ public final strictfp class UnitFormatTe
             fail("Should not accept localized unit unless requested.");
         } catch (ParserException e) {
             final String message = e.getMessage();
-            assertTrue(message, message.contains("mètre cube"));
+            assertTrue(message, message.contains("mètre"));
+            assertTrue(message, message.contains("cube"));
         }
         f.setLocale(Locale.FRANCE);
         assertSame(Units.CUBIC_METRE, f.parse("mètre cube"));
@@ -395,6 +398,29 @@ public final strictfp class UnitFormatTe
     }
 
     /**
+     * Tests parsing a unit from another position than zero and verifies that 
{@code UnitFormat} detects
+     * correctly where the unit symbol ends.
+     */
+    @Test
+    @DependsOnMethod("testParseSymbol")
+    public void testParsePosition() {
+        final UnitFormat f = new UnitFormat(Locale.UK);
+        final ParsePosition pos = new ParsePosition(4);
+        assertSame(Units.CENTIMETRE, f.parse("ABC cm DEF", pos));
+        assertEquals("ParsePosition.getIndex()", 6, pos.getIndex());
+        assertEquals("ParsePosition.getErrorIndex()", -1, pos.getErrorIndex());
+        /*
+         * Adding "cm DEF" as a unit label should allow UnitFormat to 
recognize those characters.
+         * We associate a random unit to that label, just for testing purpose.
+         */
+        pos.setIndex(4);
+        f.label(Units.HECTARE, "cm DEF");
+        assertSame(Units.HECTARE, f.parse("ABC cm DEF", pos));
+        assertEquals("ParsePosition.getIndex()", 10, pos.getIndex());
+        assertEquals("ParsePosition.getErrorIndex()", -1, pos.getErrorIndex());
+    }
+
+    /**
      * Tests {@link UnitFormat#clone()}.
      */
     @Test

svn commit: r1781499 - in /sis/branches/JDK8/core: sis-referencing/src/main/java/org/apache/sis/geometry/ sis-referencing/src/test/java/org/apache/sis/geometry/ sis-utility/src/main/java/org/apache/sis/io/ sis-utility/src/main/java/org/apache/sis/measu...

Reply via email to