This is an automated email from the ASF dual-hosted git repository.
desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
new 2537d21 Allow parsing and formatting of custom label raised to some
power, for example "yd2" for square yard.
2537d21 is described below
commit 2537d215cd216d2c5be029849df228fc187937fe
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Tue Sep 4 11:40:39 2018 +0200
Allow parsing and formatting of custom label raised to some power, for
example "yd2" for square yard.
---
.../org/apache/sis/measure/ConventionalUnit.java | 113 +++++++++++++++++----
.../java/org/apache/sis/measure/UnitFormat.java | 68 +++++++++----
.../main/java/org/apache/sis/util/Characters.java | 16 ++-
.../java/org/apache/sis/util/resources/Errors.java | 2 +-
.../org/apache/sis/measure/UnitFormatTest.java | 58 ++++++++++-
5 files changed, 207 insertions(+), 50 deletions(-)
diff --git
a/core/sis-utility/src/main/java/org/apache/sis/measure/ConventionalUnit.java
b/core/sis-utility/src/main/java/org/apache/sis/measure/ConventionalUnit.java
index 68678a8..ef66a06 100644
---
a/core/sis-utility/src/main/java/org/apache/sis/measure/ConventionalUnit.java
+++
b/core/sis-utility/src/main/java/org/apache/sis/measure/ConventionalUnit.java
@@ -155,6 +155,45 @@ final class ConventionalUnit<Q extends Quantity<Q>>
extends AbstractUnit<Q> {
}
/**
+ * Raises the given symbol to the given power. If the given symbol already
contains an exponent,
+ * it will be combined with the given power.
+ *
+ * @param symbol the symbol to raise to a power.
+ * @param n the power to which to raise the given symbol.
+ * @param root {@code true} for raising to 1/n instead of n.
+ */
+ private static String pow(final String symbol, final int n, final boolean
root) {
+ if (symbol != null) {
+ final int length = symbol.length();
+ int power = 1, i = 0;
+ while (i < length) {
+ final int c = symbol.codePointAt(i);
+ i += Character.charCount(c);
+ if (!isSymbolChar(c)) {
+ if (!Characters.isSuperScript(c) || i +
Character.charCount(c) < length) {
+ return null; // Character is not an
exponent or is not the last character.
+ }
+ power = Characters.toNormalScript(c) - '0';
+ }
+ }
+ if (power >= 0 && power <= 9) {
+ final boolean isValid;
+ if (root) {
+ isValid = (power % n) == 0;
+ power /= n;
+ } else {
+ power *= n;
+ isValid = (power >= 0 && power <= 9);
+ }
+ if (isValid) {
+ return symbol.substring(0, i) +
Characters.toSuperScript((char) (power + '0'));
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
* Returns the positive power after the given unit symbol, or 0 in case of
doubt.
* For example this method returns 1 for “m” and 2 for “m²”. We parse the
unit symbol instead
* than the {@link SystemUnit#dimension} because we can not extract easily
the power from the
@@ -180,26 +219,22 @@ final class ConventionalUnit<Q extends Quantity<Q>>
extends AbstractUnit<Q> {
* operator like the “/” in “m/s²”. In any cases we stop here because
we want the
* exponent of the first symbol, not the “²” in “m/s²”.
*/
- if (Character.isBmpCodePoint(c)) {
- final int p = Characters.toNormalScript((char) c) - '0';
- if (p >= 0 && p <= 9) {
- if (i < length) {
- c = symbol.codePointAt(i);
- if (isSymbolChar(c)) {
- // Exponent is immediately followed by a another unit
symbol character.
- // We would have expected something else, like an
arithmetic operator.
- return 0;
- }
- if (Character.isBmpCodePoint(c)) {
- c = Characters.toNormalScript((char) c);
- if (c >= '0' && c <= '9') {
- // Exponent on two digits. We do not expect so
high power after unit symbol.
- return 0;
- }
- }
+ final int p = Characters.toNormalScript(c) - '0';
+ if (p >= 0 && p <= 9) {
+ if (i < length) {
+ c = symbol.codePointAt(i);
+ if (isSymbolChar(c)) {
+ // Exponent is immediately followed by a another unit
symbol character.
+ // We would have expected something else, like an
arithmetic operator.
+ return 0;
+ }
+ c = Characters.toNormalScript(c);
+ if (c >= '0' && c <= '9') {
+ // Exponent on two digits. We do not expect so high power
after unit symbol.
+ return 0;
}
- return p;
}
+ return p;
}
return 1;
}
@@ -321,6 +356,18 @@ final class ConventionalUnit<Q extends Quantity<Q>>
extends AbstractUnit<Q> {
}
/**
+ * Returns a new unit identical to this unit except for the symbol, which
is set to the given value.
+ * This is used by {@link UnitFormat} only; we do not provide public API
for setting a unit symbol
+ * on a conventional unit.
+ */
+ final ConventionalUnit<Q> forSymbol(final String symbol) {
+ if (symbol.equals(getSymbol())) {
+ return this;
+ }
+ return new ConventionalUnit<>(target, toTarget, symbol, scope, epsg);
+ }
+
+ /**
* Unsupported operation for conventional units, as required by JSR-363
specification.
*
* @param symbol the new symbol for the alternate unit.
@@ -351,6 +398,7 @@ final class ConventionalUnit<Q extends Quantity<Q>> extends
AbstractUnit<Q> {
*/
@Override
public Unit<?> multiply(final Unit<?> multiplier) {
+ if (multiplier == this) return pow(2); // For
formating e.g. "mi²".
ensureRatioScale();
return target.multiply(multiplier).transform(toTarget);
}
@@ -376,8 +424,7 @@ final class ConventionalUnit<Q extends Quantity<Q>> extends
AbstractUnit<Q> {
@Override
public Unit<?> pow(final int n) {
ensureRatioScale();
- final Unit<?> result = target.pow(n);
- return (result == target) ? this :
result.transform(LinearConverter.pow(toTarget, n, false));
+ return applyConversion(target.pow(n), n, false);
}
/**
@@ -390,8 +437,30 @@ final class ConventionalUnit<Q extends Quantity<Q>>
extends AbstractUnit<Q> {
@Override
public Unit<?> root(final int n) {
ensureRatioScale();
- final Unit<?> result = target.root(n);
- return (result == target) ? this :
result.transform(LinearConverter.pow(toTarget, n, true));
+ return applyConversion(target.root(n), n, true);
+ }
+
+ /**
+ * Applies the {@link #toTarget} conversion factor on the result of
raising the system unit to the given power.
+ * This method shall be invoked only if {@link #ensureRatioScale()}
succeed (this is not verified).
+ * This method tries to build a unit symbol made from the current unit
raised to the given power.
+ * This is not needed for SI units since {@link #create(AbstractUnit,
UnitConverter)} can infer
+ * the symbol automatically (including its prefix), but this is useful for
non SI units like "mi²"
+ *
+ * @param result the result of {@link SystemUnit#pow(int)} or {@link
SystemUnit#root(int)}.
+ * @param n the power by which the {@link #target} has been raised
for producing {@code result}.
+ * @param root {@code true} if the power is 1/n instead of n.
+ */
+ private Unit<?> applyConversion(final Unit<?> result, final int n, final
boolean root) {
+ if (result == target) return this;
+ final LinearConverter operation = LinearConverter.pow(toTarget, n,
root);
+ if (result instanceof SystemUnit<?>) {
+ final String symbol = pow(getSymbol(), n, root);
+ if (symbol != null) {
+ return new ConventionalUnit<>((SystemUnit<?>) result,
operation, symbol, (byte) 0, (short) 0).unique(symbol);
+ }
+ }
+ return result.transform(operation);
}
/**
diff --git
a/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
b/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
index 31f52b0..42e808a 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/measure/UnitFormat.java
@@ -258,6 +258,7 @@ public class UnitFormat extends Format implements
javax.measure.format.UnitForma
/**
* Symbols or names to use for formatting units in replacement to the
default unit symbols or names.
+ * The {@link Unit} instances are the ones specified by user in calls to
{@link #label(Unit, String)}.
*
* @see #label(Unit, String)
*/
@@ -265,7 +266,8 @@ public class UnitFormat extends Format implements
javax.measure.format.UnitForma
/**
* Units associated to a given label (in addition to the system-wide
{@link UnitRegistry}).
- * This map is the converse of {@link #unitToLabel}.
+ * This map is the converse of {@link #unitToLabel}. The {@link Unit}
instances may differ from the ones
+ * specified by user since {@link AbstractUnit#symbol} may have been set
to the label specified by the user.
*
* @see #label(Unit, String)
*/
@@ -385,10 +387,10 @@ public class UnitFormat extends Format implements
javax.measure.format.UnitForma
* <div class="section">Restriction on character set</div>
* Current implementation accepts only {@linkplain Character#isLetter(int)
letters},
* {@linkplain Characters#isSubScript(int) subscripts}, {@linkplain
Character#isSpaceChar(int) spaces}
- * (including non-breaking spaces but <strong>not</strong> CR/LF
characters), the degree sign (°) and
- * a few other characters like underscore,
- * but the set of legal characters may be expanded in future Apache SIS
versions.
- * However the following restrictions are likely to remain:
+ * (including non-breaking spaces but not CR/LF characters),
+ * the degree sign (°) and a few other characters like underscore.
+ * The set of legal characters may be expanded in future Apache SIS
versions,
+ * but the following restrictions are likely to remain:
*
* <ul>
* <li>The following characters are reserved since they have special
meaning in UCUM format, in URI
@@ -412,9 +414,13 @@ public class UnitFormat extends Format implements
javax.measure.format.UnitForma
}
i += Character.charCount(c);
}
+ Unit<?> labeledUnit = unit;
+ if (labeledUnit instanceof ConventionalUnit<?>) {
+ labeledUnit = ((ConventionalUnit<?>) labeledUnit).forSymbol(label);
+ }
final Unit<?> unitForOldLabel =
labelToUnit.remove(unitToLabel.put(unit, label));
- final Unit<?> oldUnitForLabel = labelToUnit.put(label, unit);
- if (oldUnitForLabel != null && !oldUnitForLabel.equals(unit) &&
!label.equals(unitToLabel.remove(oldUnitForLabel))) {
+ final Unit<?> oldUnitForLabel = labelToUnit.put(label, labeledUnit);
+ if (oldUnitForLabel != null && !oldUnitForLabel.equals(labeledUnit) &&
!label.equals(unitToLabel.remove(oldUnitForLabel))) {
/*
* Assuming there is no bug in our algorithm, this exception
should never happen
* unless this UnitFormat has been modified concurrently in
another thread.
@@ -904,7 +910,7 @@ public class UnitFormat extends Format implements
javax.measure.format.UnitForma
*/
private static int exponentOperator(final CharSequence symbols, int i,
final int length) {
if (i >= 0 && ++i < length) {
- final char c = symbols.charAt(i);
+ final char c = symbols.charAt(i); // No need for code
point because next conditions are true only in BMP.
if (c == Style.EXPONENT_OR_MULTIPLY) {
return 1; // "**" operator: need
to skip one character after '*'.
}
@@ -929,22 +935,33 @@ public class UnitFormat extends Format implements
javax.measure.format.UnitForma
/**
* Returns {@code true} if the given character is a digit in the sense of
the {@code UnitFormat} parser.
* Note that "digit" is taken here in a much more restrictive way than
{@link Character#isDigit(int)}.
+ *
+ * <p>A return value of {@code true} guarantees that the given character
is in the Basic Multilingual Plane (BMP).
+ * Consequently the {@code c} argument value does not need to be the
result of {@link String#codePointAt(int)};
+ * the result of {@link String#charAt(int)} is sufficient. We nevertheless
use the {@code int} type for avoiding
+ * the need to cast if caller uses code points for another reason.</p>
+ *
+ * @see Character#isBmpCodePoint(int)
*/
- private static boolean isDigit(final char c) {
+ private static boolean isDigit(final int c) {
return c >= '0' && c <= '9';
}
/**
* Returns {@code true} if the given character is the sign of a number
according the {@code UnitFormat} parser.
+ * A return value of {@code true} guarantees that the given character is
in the Basic Multilingual Plane (BMP).
+ * Consequently the {@code c} argument value does not need to be the
result of {@link String#codePointAt(int)}.
*/
- private static boolean isSign(final char c) {
+ private static boolean isSign(final int c) {
return c == '+' || c == '-';
}
/**
* Returns {@code true} if the given character is the sign of a division
operator.
+ * A return value of {@code true} guarantees that the given character is
in the Basic Multilingual Plane (BMP).
+ * Consequently the {@code c} argument value does not need to be the
result of {@link String#codePointAt(int)}.
*/
- private static boolean isDivisor(final char c) {
+ private static boolean isDivisor(final int c) {
return c == '/' || c == AbstractUnit.DIVIDE;
}
@@ -1367,7 +1384,7 @@ search: while ((i =
CharSequences.skipTrailingWhitespaces(symbols, start, i)
*
* If the last character is a super-script, then we assume
a notation like "10⁻⁴".
*/
- final char c = uom.charAt(0);
+ final char c = uom.charAt(0); // No need for code
point because next condition is true only for BMP.
if (isDigit(c) || isSign(c)) {
final double multiplier;
try {
@@ -1397,18 +1414,22 @@ search: while ((i =
CharSequences.skipTrailingWhitespaces(symbols, start, i)
* implementation) or a number parseable with
Integer.parseInt(String).
*/
Fraction power = null;
- int i = length;
- char c = uom.charAt(--i);
+ int i = length;
+ int c = uom.codePointBefore(i);
+ i -= Character.charCount(c);
if (Characters.isSuperScript(c)) {
c = Characters.toNormalScript(c);
if (isDigit(c)) {
power = new Fraction(c - '0', 1);
}
} else if (isDigit(c)) {
- do {
- c = uom.charAt(--i);
- if (!isDigit(c) && !isDivisor(c)) {
- if (!isSign(c)) i++;
+ while (i != 0) {
+ c = uom.codePointBefore(i);
+ final boolean isExponent = isDigit(c) ||
isDivisor(c);
+ if (isExponent || isSign(c)) {
+ i -= Character.charCount(c);
+ }
+ if (!isExponent) {
try {
power = new Fraction(uom.substring(i));
} catch (NumberFormatException e) {
@@ -1418,7 +1439,7 @@ search: while ((i =
CharSequences.skipTrailingWhitespaces(symbols, start, i)
}
break;
}
- } while (i != 0);
+ }
}
if (power != null) {
/*
@@ -1427,6 +1448,7 @@ search: while ((i =
CharSequences.skipTrailingWhitespaces(symbols, start, i)
*/
i = CharSequences.skipTrailingWhitespaces(uom, 0, i);
if (i != 0) {
+ // No need for code point because next conditions
are true only in BMP.
switch (uom.charAt(i-1)) {
case Style.EXPONENT_OR_MULTIPLY: {
if (i != 1 && uom.charAt(i-2) ==
Style.EXPONENT_OR_MULTIPLY) i--;
@@ -1438,7 +1460,11 @@ search: while ((i =
CharSequences.skipTrailingWhitespaces(symbols, start, i)
}
}
}
- unit =
Prefixes.getUnit(uom.substring(CharSequences.skipLeadingWhitespaces(uom, 0, i),
i));
+ final String symbol =
uom.substring(CharSequences.skipLeadingWhitespaces(uom, 0, i), i);
+ unit = labelToUnit.get(symbol);
+ if (unit == null) {
+ unit = Prefixes.getUnit(symbol);
+ }
if (unit != null) {
int numerator = power.numerator;
int denominator = power.denominator;
@@ -1485,7 +1511,7 @@ search: while ((i =
CharSequences.skipTrailingWhitespaces(symbols, start, i)
// Example: "10⁻⁴". Split in base and exponent.
final StringBuilder buffer = new StringBuilder(s);
do {
- buffer.append(Characters.toNormalScript((char) c)); // This
API does not support code points yet.
+ buffer.appendCodePoint(Characters.toNormalScript(c));
if ((s -= Character.charCount(c)) <= 0) break;
c = term.codePointBefore(s);
} while (Characters.isSuperScript(c));
diff --git a/core/sis-utility/src/main/java/org/apache/sis/util/Characters.java
b/core/sis-utility/src/main/java/org/apache/sis/util/Characters.java
index 102a92e..17fb12c 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/util/Characters.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/util/Characters.java
@@ -27,7 +27,7 @@ import org.apache.sis.util.resources.Errors;
* symbols. For those symbols, constants are declared in this class.
*
* @author Martin Desruisseaux (Geomatys)
- * @version 0.6
+ * @version 1.0
* @since 0.3
* @module
*/
@@ -250,6 +250,20 @@ public final class Characters extends Static {
* given character was not a superscript or a subscript.
*/
public static char toNormalScript(char c) {
+ // Cast is safe because all return values are in the Basic
Multilingual Plane (BMP).
+ return (char) toNormalScript((int) c);
+ }
+
+ /**
+ * Converts the given code point to normal script.
+ *
+ * @param c the character to convert.
+ * @return the given character as a normal script, or {@code c} if the
+ * given character was not a superscript or a subscript.
+ *
+ * @since 1.0
+ */
+ public static int toNormalScript(int c) {
switch (c) {
case '\u2071': // Exceptions to the default case. They would be
the ¹²³
case '\u2072': // cases if they were not defined in the Latin-1
range.
diff --git
a/core/sis-utility/src/main/java/org/apache/sis/util/resources/Errors.java
b/core/sis-utility/src/main/java/org/apache/sis/util/resources/Errors.java
index b0cfc8b..365cdee 100644
--- a/core/sis-utility/src/main/java/org/apache/sis/util/resources/Errors.java
+++ b/core/sis-utility/src/main/java/org/apache/sis/util/resources/Errors.java
@@ -368,7 +368,7 @@ public final class Errors extends IndexedResourceBundle {
public static final short IllegalCoordinateSystem_1 = 51;
/**
- * The “{1}” pattern can not be applied to formating of objects of
type ‘{0}’.
+ * The “{1}” pattern can not be applied to formatting of objects of
type ‘{0}’.
*/
public static final short IllegalFormatPatternForClass_2 = 52;
diff --git
a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
index e32ef1a..6ba7095 100644
--- a/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
+++ b/core/sis-utility/src/test/java/org/apache/sis/measure/UnitFormatTest.java
@@ -22,7 +22,9 @@ import java.util.Locale;
import java.text.ParsePosition;
import java.lang.reflect.Field;
import javax.measure.Unit;
+import javax.measure.quantity.Length;
import javax.measure.format.ParserException;
+import org.apache.sis.util.ComparisonMode;
import org.apache.sis.util.Characters;
import org.apache.sis.test.DependsOn;
import org.apache.sis.test.DependsOnMethod;
@@ -280,6 +282,18 @@ public final strictfp class UnitFormatTest extends
TestCase {
}
/**
+ * Tests formatting of units raised to some powers.
+ */
+ @Test
+ public void testFormatPower() {
+ final UnitFormat f = new UnitFormat(Locale.UK);
+ f.setStyle(UnitFormat.Style.SYMBOL);
+ assertEquals("m²", f.format(Units.METRE .pow(2)));
+ assertEquals("cm²", f.format(Units.CENTIMETRE.pow(2)));
+ assertEquals("in²", f.format(Units.INCH .pow(2)));
+ }
+
+ /**
* Tests formatting of some more unusual units. The units tested by this
method are artificial
* and somewhat convolved. The intent is to verify that unit formatting is
still robust.
*/
@@ -489,10 +503,10 @@ public final strictfp class UnitFormatTest extends
TestCase {
final UnitFormat f = new UnitFormat(Locale.UK);
/*
* Kilograms should be identified even if they appear in an expression.
- * Current implementation creates a symbol early when it detect such
case.
+ * Current implementation creates a symbol early when it detects such
case.
*/
assertEquals("mg∕m", f.parse("10^-6.kg/m").getSymbol());
-// assertEquals("μg∕m³", f.parse("μg.m-3").getSymbol());
+ assertEquals("µg∕m³", f.parse("μg.m-3").getSymbol());
}
/**
@@ -532,7 +546,7 @@ public final strictfp class UnitFormatTest extends TestCase
{
public void testParsePosition() {
final UnitFormat f = new UnitFormat(Locale.UK);
final ParsePosition pos = new ParsePosition(4);
- assertSame(Units.CENTIMETRE, f.parse("ABC cm DEF", pos));
+ assertSame(Units.CENTIMETRE, f.parse("ABC cm foo", pos));
assertEquals("ParsePosition.getIndex()", 6, pos.getIndex());
assertEquals("ParsePosition.getErrorIndex()", -1, pos.getErrorIndex());
/*
@@ -540,8 +554,8 @@ public final strictfp class UnitFormatTest extends TestCase
{
* We associate a random unit to that label, just for testing purpose.
*/
pos.setIndex(4);
- f.label(Units.HECTARE, "cm DEF");
- assertSame(Units.HECTARE, f.parse("ABC cm DEF", pos));
+ f.label(Units.HECTARE, "cm foo");
+ assertEqualsIgnoreSymbol(Units.HECTARE, f.parse("ABC cm foo", pos));
assertEquals("ParsePosition.getIndex()", 10, pos.getIndex());
assertEquals("ParsePosition.getErrorIndex()", -1, pos.getErrorIndex());
}
@@ -625,6 +639,23 @@ public final strictfp class UnitFormatTest extends
TestCase {
}
/**
+ * Tests parsing and formatting of custom symbol.
+ */
+ @Test
+ @DependsOnMethod({"testLabel", "testParseExponentiation"})
+ public void testParseAndFormatLabel() {
+ final Unit<Length> yard = Units.METRE.multiply(0.9144);
+ final Unit<?> yard2 = yard.pow(2);
+ final UnitFormat f = new UnitFormat(Locale.ENGLISH);
+ f.label(yard, "yd");
+ roundtrip(f, "yd", "yd", yard);
+ roundtrip(f, "yd**2", "yd²", yard2);
+ roundtrip(f, "yd^2", "yd²", yard2);
+ roundtrip(f, "yd2", "yd²", yard2);
+ roundtrip(f, "yd²", "yd²", yard2);
+ }
+
+ /**
* Reminder for units parsing and formatting that still need improvement.
* The "expected" values checked in this method are not really what we
expect,
* but they reflect the current behavior of Apache SIS units library. We
keep
@@ -659,4 +690,21 @@ public final strictfp class UnitFormatTest extends
TestCase {
final String actual = f.format(unit);
assertEquals(expected, actual);
}
+
+ /**
+ * Sames as {@link #roundtrip(UnitFormat, String, String)}, but also
compare with the given units ignoring symbol.
+ */
+ private static void roundtrip(final UnitFormat f, final String symbol,
final String expected, final Unit<?> reference) {
+ final Unit<?> unit = f.parse(symbol);
+ assertEqualsIgnoreSymbol(reference, unit);
+ final String actual = f.format(unit);
+ assertEquals(expected, actual);
+ }
+
+ /**
+ * Asserts that the given units are equal, ignoring symbol.
+ */
+ private static void assertEqualsIgnoreSymbol(final Unit<?> actual, final
Unit<?> expected) {
+ assertTrue(((AbstractUnit<?>) expected).equals(actual,
ComparisonMode.DEBUG));
+ }
}