http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/AlphabetConverter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/AlphabetConverter.java b/src/main/java/org/apache/commons/text/beta/AlphabetConverter.java new file mode 100644 index 0000000..40c9698 --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/AlphabetConverter.java @@ -0,0 +1,457 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Set; + +/** + * <p> + * Convert from one alphabet to another, with the possibility of leaving certain characters unencoded. + * </p> + * + * <p> + * The target and do not encode languages must be in the Unicode BMP, but the source language does not. + * </p> + * + * <p> + * The encoding will all be of a fixed length, except for the 'do not encode' chars, which will be of length 1 + * </p> + * + * <h3>Sample usage</h3> + * + * <pre> + * Character[] originals; // a, b, c, d + * Character[] encoding; // 0, 1, d + * Character[] doNotEncode; // d + * + * AlphabetConverter ac = AlphabetConverter.createConverterFromChars(originals, encoding, doNotEncode); + * + * ac.encode("a"); // 00 + * ac.encode("b"); // 01 + * ac.encode("c"); // 0d + * ac.encode("d"); // d + * ac.encode("abcd"); // 00010dd + * </pre> + * + * <p> + * #ThreadSafe# AlphabetConverter class methods are threadsafe as they do not change internal state. + * </p> + * + * @since 1.0 + * + */ +public final class AlphabetConverter { + + /** + * Original string to be encoded. + */ + private final Map<Integer, String> originalToEncoded; + /** + * Encoding alphabet. + */ + private final Map<String, String> encodedToOriginal; + /** + * Length of the encoded letter. + */ + private final int encodedLetterLength; + /** + * Arrow constant, used for converting the object into a string. + */ + private static final String ARROW = " -> "; + /** + * Line separator, used for converting the object into a string. + */ + private static final String LINE_SEPARATOR = System.getProperty("line.separator"); + + /** + * Hidden constructor for alphabet converter. Used by static helper methods. + * + * @param originalToEncoded original string to be encoded + * @param encodedToOriginal encoding alphabet + * @param encodedLetterLength length of the encoded letter + */ + private AlphabetConverter(final Map<Integer, String> originalToEncoded, final Map<String, String> encodedToOriginal, + final int encodedLetterLength) { + + this.originalToEncoded = originalToEncoded; + this.encodedToOriginal = encodedToOriginal; + this.encodedLetterLength = encodedLetterLength; + } + + /** + * Encode a given string. + * + * @param original the string to be encoded + * @return the encoded string, {@code null} if the given string is null + * @throws UnsupportedEncodingException if chars that are not supported are encountered + */ + public String encode(final String original) throws UnsupportedEncodingException { + if (original == null) { + return null; + } + + final StringBuilder sb = new StringBuilder(); + + for (int i = 0; i < original.length();) { + final int codepoint = original.codePointAt(i); + + final String nextLetter = originalToEncoded.get(codepoint); + + if (nextLetter == null) { + throw new UnsupportedEncodingException( + "Couldn't find encoding for '" + codePointToString(codepoint) + "' in " + original); + } + + sb.append(nextLetter); + + i += Character.charCount(codepoint); + } + + return sb.toString(); + } + + /** + * Decode a given string. + * + * @param encoded a string that has been encoded using this AlphabetConverter + * @return the decoded string, {@code null} if the given string is null + * @throws UnsupportedEncodingException if unexpected characters that cannot be handled are encountered + */ + public String decode(final String encoded) throws UnsupportedEncodingException { + if (encoded == null) { + return null; + } + + final StringBuilder result = new StringBuilder(); + + for (int j = 0; j < encoded.length();) { + final Integer i = encoded.codePointAt(j); + final String s = codePointToString(i); + + if (s.equals(originalToEncoded.get(i))) { + result.append(s); + j++; // because we do not encode in Unicode extended the length of each encoded char is 1 + } else { + if (j + encodedLetterLength > encoded.length()) { + throw new UnsupportedEncodingException("Unexpected end of string while decoding " + encoded); + } + final String nextGroup = encoded.substring(j, j + encodedLetterLength); + final String next = encodedToOriginal.get(nextGroup); + if (next == null) { + throw new UnsupportedEncodingException( + "Unexpected string without decoding (" + nextGroup + ") in " + encoded); + } + result.append(next); + j += encodedLetterLength; + } + } + + return result.toString(); + } + + /** + * Get the length of characters in the encoded alphabet that are necessary for each character in the original + * alphabet. + * + * @return the length of the encoded char + */ + public int getEncodedCharLength() { + return encodedLetterLength; + } + + /** + * Get the mapping from integer code point of source language to encoded string. Use to reconstruct converter from + * serialized map. + * + * @return the original map + */ + public Map<Integer, String> getOriginalToEncoded() { + return Collections.unmodifiableMap(originalToEncoded); + } + + /** + * Recursive method used when creating encoder/decoder. + * + * @param level at which point it should add a single encoding + * @param currentEncoding current encoding + * @param encoding letters encoding + * @param originals original values + * @param doNotEncodeMap map of values that should not be encoded + */ + @SuppressWarnings("PMD") + private void addSingleEncoding(final int level, final String currentEncoding, final Collection<Integer> encoding, + final Iterator<Integer> originals, final Map<Integer, String> doNotEncodeMap) { + + if (level > 0) { + for (final int encodingLetter : encoding) { + if (originals.hasNext()) { + + // this skips the doNotEncode chars if they are in the + // leftmost place + if (level != encodedLetterLength || !doNotEncodeMap.containsKey(encodingLetter)) { + addSingleEncoding(level - 1, currentEncoding + codePointToString(encodingLetter), encoding, + originals, doNotEncodeMap); + } + } else { + return; // done encoding all the original alphabet + } + } + } else { + Integer next = originals.next(); + + while (doNotEncodeMap.containsKey(next)) { + final String originalLetterAsString = codePointToString(next); + + originalToEncoded.put(next, originalLetterAsString); + encodedToOriginal.put(originalLetterAsString, originalLetterAsString); + + if (!originals.hasNext()) { + return; + } + + next = originals.next(); + } + + final String originalLetterAsString = codePointToString(next); + + originalToEncoded.put(next, currentEncoding); + encodedToOriginal.put(currentEncoding, originalLetterAsString); + } + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + + for (final Entry<Integer, String> entry : originalToEncoded.entrySet()) { + sb.append(codePointToString(entry.getKey())).append(ARROW).append(entry.getValue()).append(LINE_SEPARATOR); + } + + return sb.toString(); + } + + @Override + public boolean equals(final Object obj) { + if (obj == null) { + return false; + } + if (obj == this) { + return true; + } + if (!(obj instanceof AlphabetConverter)) { + return false; + } + final AlphabetConverter other = (AlphabetConverter) obj; + return originalToEncoded.equals(other.originalToEncoded) && encodedToOriginal.equals(other.encodedToOriginal) + && encodedLetterLength == other.encodedLetterLength; + } + + @Override + public int hashCode() { + return Objects.hash(originalToEncoded, encodedToOriginal, encodedLetterLength); + } + + // -- static methods + + /** + * Create a new converter from a map. + * + * @param originalToEncoded a map returned from getOriginalToEncoded() + * @return the reconstructed AlphabetConverter + * @see AlphabetConverter#getOriginalToEncoded() + */ + public static AlphabetConverter createConverterFromMap(final Map<Integer, String> originalToEncoded) { + final Map<Integer, String> unmodifiableOriginalToEncoded = Collections.unmodifiableMap(originalToEncoded); + final Map<String, String> encodedToOriginal = new LinkedHashMap<>(); + final Map<Integer, String> doNotEncodeMap = new HashMap<>(); + + int encodedLetterLength = 1; + + for (final Entry<Integer, String> e : unmodifiableOriginalToEncoded.entrySet()) { + final String originalAsString = codePointToString(e.getKey()); + encodedToOriginal.put(e.getValue(), originalAsString); + + if (e.getValue().equals(originalAsString)) { + doNotEncodeMap.put(e.getKey(), e.getValue()); + } + + if (e.getValue().length() > encodedLetterLength) { + encodedLetterLength = e.getValue().length(); + } + } + + return new AlphabetConverter(unmodifiableOriginalToEncoded, encodedToOriginal, encodedLetterLength); + } + + /** + * Create an alphabet converter, for converting from the original alphabet, to the encoded alphabet, while leaving + * the characters in <em>doNotEncode</em> as they are (if possible). + * + * <p>Duplicate letters in either original or encoding will be ignored.</p> + * + * @param original an array of chars representing the original alphabet + * @param encoding an array of chars representing the alphabet to be used for encoding + * @param doNotEncode an array of chars to be encoded using the original alphabet - every char here must appear in + * both the previous params + * @return the AlphabetConverter + * @throws IllegalArgumentException if an AlphabetConverter cannot be constructed + */ + public static AlphabetConverter createConverterFromChars(final Character[] original, final Character[] encoding, + final Character[] doNotEncode) { + return AlphabetConverter.createConverter(convertCharsToIntegers(original), convertCharsToIntegers(encoding), + convertCharsToIntegers(doNotEncode)); + } + + /** + * Convert characters to integers. + * + * @param chars array of characters + * @return an equivalent array of integers + */ + private static Integer[] convertCharsToIntegers(final Character[] chars) { + if (chars == null || chars.length == 0) { + return new Integer[0]; + } + final Integer[] integers = new Integer[chars.length]; + for (int i = 0; i < chars.length; i++) { + integers[i] = (int) chars[i]; + } + return integers; + } + + /** + * Create an alphabet converter, for converting from the original alphabet, to the encoded alphabet, while leaving + * the characters in <em>doNotEncode</em> as they are (if possible). + * + * <p>Duplicate letters in either original or encoding will be ignored.</p> + * + * @param original an array of ints representing the original alphabet in codepoints + * @param encoding an array of ints representing the alphabet to be used for encoding, in codepoints + * @param doNotEncode an array of ints representing the chars to be encoded using the original alphabet - every char + * here must appear in both the previous params + * @return the AlphabetConverter + * @throws IllegalArgumentException if an AlphabetConverter cannot be constructed + */ + public static AlphabetConverter createConverter(final Integer[] original, final Integer[] encoding, final Integer[] doNotEncode) { + + final Set<Integer> originalCopy = new LinkedHashSet<>(Arrays.<Integer> asList(original)); + final Set<Integer> encodingCopy = new LinkedHashSet<>(Arrays.<Integer> asList(encoding)); + final Set<Integer> doNotEncodeCopy = new LinkedHashSet<>(Arrays.<Integer> asList(doNotEncode)); + + final Map<Integer, String> originalToEncoded = new LinkedHashMap<>(); + final Map<String, String> encodedToOriginal = new LinkedHashMap<>(); + final Map<Integer, String> doNotEncodeMap = new HashMap<>(); + + int encodedLetterLength; + + for (final int i : doNotEncodeCopy) { + if (!originalCopy.contains(i)) { + throw new IllegalArgumentException( + "Can not use 'do not encode' list because original alphabet does not contain '" + + codePointToString(i) + "'"); + } + + if (!encodingCopy.contains(i)) { + throw new IllegalArgumentException( + "Can not use 'do not encode' list because encoding alphabet does not contain '" + + codePointToString(i) + "'"); + } + + doNotEncodeMap.put(i, codePointToString(i)); + } + + if (encodingCopy.size() >= originalCopy.size()) { + encodedLetterLength = 1; + + final Iterator<Integer> it = encodingCopy.iterator(); + + for (final int originalLetter : originalCopy) { + final String originalLetterAsString = codePointToString(originalLetter); + + if (doNotEncodeMap.containsKey(originalLetter)) { + originalToEncoded.put(originalLetter, originalLetterAsString); + encodedToOriginal.put(originalLetterAsString, originalLetterAsString); + } else { + Integer next = it.next(); + + while (doNotEncodeCopy.contains(next)) { + next = it.next(); + } + + final String encodedLetter = codePointToString(next); + + originalToEncoded.put(originalLetter, encodedLetter); + encodedToOriginal.put(encodedLetter, originalLetterAsString); + } + } + + return new AlphabetConverter(originalToEncoded, encodedToOriginal, encodedLetterLength); + + } else if (encodingCopy.size() - doNotEncodeCopy.size() < 2) { + throw new IllegalArgumentException( + "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has " + + (encodingCopy.size() - doNotEncodeCopy.size())); + } else { + // we start with one which is our minimum, and because we do the + // first division outside the loop + int lettersSoFar = 1; + + // the first division takes into account that the doNotEncode + // letters can't be in the leftmost place + int lettersLeft = (originalCopy.size() - doNotEncodeCopy.size()) + / (encodingCopy.size() - doNotEncodeCopy.size()); + + while (lettersLeft / encodingCopy.size() >= 1) { + lettersLeft = lettersLeft / encodingCopy.size(); + lettersSoFar++; + } + + encodedLetterLength = lettersSoFar + 1; + + final AlphabetConverter ac = new AlphabetConverter(originalToEncoded, encodedToOriginal, encodedLetterLength); + + ac.addSingleEncoding(encodedLetterLength, "", encodingCopy, originalCopy.iterator(), doNotEncodeMap); + + return ac; + } + } + + /** + * Create new String that contains just the given code point. + * + * @param i code point + * @return a new string with the new code point + * @see "http://www.oracle.com/us/technologies/java/supplementary-142654.html" + */ + private static String codePointToString(final int i) { + if (Character.charCount(i) == 1) { + return String.valueOf((char) i); + } + return new String(Character.toChars(i)); + } +}
http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/Builder.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/Builder.java b/src/main/java/org/apache/commons/text/beta/Builder.java new file mode 100644 index 0000000..c789cbc --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/Builder.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +/** + * <p> + * The Builder interface is designed to designate a class as a <em>builder</em> + * object in the Builder design pattern. Builders are capable of creating and + * configuring objects or results that normally take multiple steps to construct + * or are very complex to derive. + * </p> + * + * <p> + * The builder interface defines a single method, {@link #build()}, that + * classes must implement. The result of this method should be the final + * configured object or result after all building operations are performed. + * </p> + * + * <p> + * It is a recommended practice that the methods supplied to configure the + * object or result being built return a reference to {@code this} so that + * method calls can be chained together. + * </p> + * + * <p> + * Example Builder: + * <pre><code> + * class FontBuilder implements Builder<Font> { + * private Font font; + * + * public FontBuilder(String fontName) { + * this.font = new Font(fontName, Font.PLAIN, 12); + * } + * + * public FontBuilder bold() { + * this.font = this.font.deriveFont(Font.BOLD); + * return this; // Reference returned so calls can be chained + * } + * + * public FontBuilder size(float pointSize) { + * this.font = this.font.deriveFont(pointSize); + * return this; // Reference returned so calls can be chained + * } + * + * // Other Font construction methods + * + * public Font build() { + * return this.font; + * } + * } + * </code></pre> + * + * Example Builder Usage: + * <pre><code> + * Font bold14ptSansSerifFont = new FontBuilder(Font.SANS_SERIF).bold() + * .size(14.0f) + * .build(); + * </code></pre> + * + * + * @param <T> the type of object that the builder will construct or compute. + * @since 1.0 + * + */ +public interface Builder<T> { + + /** + * Returns a reference to the object being constructed or result being + * calculated by the builder. + * + * @return the object constructed or result calculated by the builder. + */ + T build(); +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/CharacterPredicate.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/CharacterPredicate.java b/src/main/java/org/apache/commons/text/beta/CharacterPredicate.java new file mode 100644 index 0000000..ddf72c8 --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/CharacterPredicate.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +/** + * A predicate for selecting code points. Implementations of this interface must + * be thread safe. + * + * @since 1.0 + */ +public interface CharacterPredicate { + + /** + * Tests the code point with this predicate. + * + * @param codePoint + * the code point to test + * @return {@code true} if the code point matches the predicate, + * {@code false} otherwise + * @since 1.0 + */ + boolean test(int codePoint); +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/CharacterPredicates.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/CharacterPredicates.java b/src/main/java/org/apache/commons/text/beta/CharacterPredicates.java new file mode 100644 index 0000000..097de09 --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/CharacterPredicates.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +/** + * <p> + * Commonly used implementations of {@link CharacterPredicate}. Per the interface + * requirements, all implementations are thread safe. + * </p> + * + * @since 1.0 + */ +public enum CharacterPredicates implements CharacterPredicate { + + /** + * Tests code points against {@link Character#isLetter(int)} + * + * @since 1.0 + */ + LETTERS { + @Override + public boolean test(int codePoint) { + return Character.isLetter(codePoint); + } + }, + + /** + * Tests code points against {@link Character#isDigit(int)}. + * + * @since 1.0 + */ + DIGITS { + @Override + public boolean test(int codePoint) { + return Character.isDigit(codePoint); + } + } +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/CompositeFormat.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/CompositeFormat.java b/src/main/java/org/apache/commons/text/beta/CompositeFormat.java new file mode 100644 index 0000000..f44219d --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/CompositeFormat.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +import java.text.FieldPosition; +import java.text.Format; +import java.text.ParseException; +import java.text.ParsePosition; + +/** + * Formats using one formatter and parses using a different formatter. An + * example of use for this would be a webapp where data is taken in one way and + * stored in a database another way. + * + * @since 1.0 + */ +public class CompositeFormat extends Format { + + /** + * Required for serialization support. + * + * @see java.io.Serializable + */ + private static final long serialVersionUID = -4329119827877627683L; + + /** The parser to use. */ + private final Format parser; + /** The formatter to use. */ + private final Format formatter; + + /** + * Create a format that points its parseObject method to one implementation + * and its format method to another. + * + * @param parser implementation + * @param formatter implementation + */ + public CompositeFormat(final Format parser, final Format formatter) { + this.parser = parser; + this.formatter = formatter; + } + + /** + * Uses the formatter Format instance. + * + * @param obj the object to format + * @param toAppendTo the {@link StringBuffer} to append to + * @param pos the FieldPosition to use (or ignore). + * @return <code>toAppendTo</code> + * @see Format#format(Object, StringBuffer, FieldPosition) + */ + @Override // Therefore has to use StringBuffer + public StringBuffer format(final Object obj, final StringBuffer toAppendTo, + final FieldPosition pos) { + return formatter.format(obj, toAppendTo, pos); + } + + /** + * Uses the parser Format instance. + * + * @param source the String source + * @param pos the ParsePosition containing the position to parse from, will + * be updated according to parsing success (index) or failure + * (error index) + * @return the parsed Object + * @see Format#parseObject(String, ParsePosition) + */ + @Override + public Object parseObject(final String source, final ParsePosition pos) { + return parser.parseObject(source, pos); + } + + /** + * Provides access to the parser Format implementation. + * + * @return parser Format implementation + */ + public Format getParser() { + return this.parser; + } + + /** + * Provides access to the parser Format implementation. + * + * @return formatter Format implementation + */ + public Format getFormatter() { + return this.formatter; + } + + /** + * Utility method to parse and then reformat a String. + * + * @param input String to reformat + * @return A reformatted String + * @throws ParseException thrown by parseObject(String) call + */ + public String reformat(final String input) throws ParseException { + return format(parseObject(input)); + } + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/ExtendedMessageFormat.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/ExtendedMessageFormat.java b/src/main/java/org/apache/commons/text/beta/ExtendedMessageFormat.java new file mode 100644 index 0000000..09a3e02 --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/ExtendedMessageFormat.java @@ -0,0 +1,529 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +import java.text.Format; +import java.text.MessageFormat; +import java.text.ParsePosition; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.Locale; +import java.util.Map; +import java.util.Objects; + +/** + * Extends <code>java.text.MessageFormat</code> to allow pluggable/additional formatting + * options for embedded format elements. Client code should specify a registry + * of <code>FormatFactory</code> instances associated with <code>String</code> + * format names. This registry will be consulted when the format elements are + * parsed from the message pattern. In this way custom patterns can be specified, + * and the formats supported by <code>java.text.MessageFormat</code> can be overridden + * at the format and/or format style level (see MessageFormat). A "format element" + * embedded in the message pattern is specified (<b>()?</b> signifies optionality):<br> + * <code>{</code><i>argument-number</i><b>(</b><code>,</code><i>format-name</i><b> + * (</b><code>,</code><i>format-style</i><b>)?)?</b><code>}</code> + * + * <p> + * <i>format-name</i> and <i>format-style</i> values are trimmed of surrounding whitespace + * in the manner of <code>java.text.MessageFormat</code>. If <i>format-name</i> denotes + * <code>FormatFactory formatFactoryInstance</code> in <code>registry</code>, a <code>Format</code> + * matching <i>format-name</i> and <i>format-style</i> is requested from + * <code>formatFactoryInstance</code>. If this is successful, the <code>Format</code> + * found is used for this format element. + * </p> + * + * <p><b>NOTICE:</b> The various subformat mutator methods are considered unnecessary; they exist on the parent + * class to allow the type of customization which it is the job of this class to provide in + * a configurable fashion. These methods have thus been disabled and will throw + * <code>UnsupportedOperationException</code> if called. + * </p> + * + * <p>Limitations inherited from <code>java.text.MessageFormat</code>:</p> + * <ul> + * <li>When using "choice" subformats, support for nested formatting instructions is limited + * to that provided by the base class.</li> + * <li>Thread-safety of <code>Format</code>s, including <code>MessageFormat</code> and thus + * <code>ExtendedMessageFormat</code>, is not guaranteed.</li> + * </ul> + * + * @since 1.0 + */ +public class ExtendedMessageFormat extends MessageFormat { + private static final long serialVersionUID = -2362048321261811743L; + private static final int HASH_SEED = 31; + + private static final String DUMMY_PATTERN = ""; + private static final char START_FMT = ','; + private static final char END_FE = '}'; + private static final char START_FE = '{'; + private static final char QUOTE = '\''; + + private String toPattern; + private final Map<String, ? extends FormatFactory> registry; + + /** + * Create a new ExtendedMessageFormat for the default locale. + * + * @param pattern the pattern to use, not null + * @throws IllegalArgumentException in case of a bad pattern. + */ + public ExtendedMessageFormat(final String pattern) { + this(pattern, Locale.getDefault()); + } + + /** + * Create a new ExtendedMessageFormat. + * + * @param pattern the pattern to use, not null + * @param locale the locale to use, not null + * @throws IllegalArgumentException in case of a bad pattern. + */ + public ExtendedMessageFormat(final String pattern, final Locale locale) { + this(pattern, locale, null); + } + + /** + * Create a new ExtendedMessageFormat for the default locale. + * + * @param pattern the pattern to use, not null + * @param registry the registry of format factories, may be null + * @throws IllegalArgumentException in case of a bad pattern. + */ + public ExtendedMessageFormat(final String pattern, final Map<String, ? extends FormatFactory> registry) { + this(pattern, Locale.getDefault(), registry); + } + + /** + * Create a new ExtendedMessageFormat. + * + * @param pattern the pattern to use, not null + * @param locale the locale to use, not null + * @param registry the registry of format factories, may be null + * @throws IllegalArgumentException in case of a bad pattern. + */ + public ExtendedMessageFormat(final String pattern, final Locale locale, final Map<String, ? extends FormatFactory> registry) { + super(DUMMY_PATTERN); + setLocale(locale); + this.registry = registry; + applyPattern(pattern); + } + + /** + * {@inheritDoc} + */ + @Override + public String toPattern() { + return toPattern; + } + + /** + * Apply the specified pattern. + * + * @param pattern String + */ + @Override + public final void applyPattern(final String pattern) { + if (registry == null) { + super.applyPattern(pattern); + toPattern = super.toPattern(); + return; + } + final ArrayList<Format> foundFormats = new ArrayList<>(); + final ArrayList<String> foundDescriptions = new ArrayList<>(); + final StringBuilder stripCustom = new StringBuilder(pattern.length()); + + final ParsePosition pos = new ParsePosition(0); + final char[] c = pattern.toCharArray(); + int fmtCount = 0; + while (pos.getIndex() < pattern.length()) { + switch (c[pos.getIndex()]) { + case QUOTE: + appendQuotedString(pattern, pos, stripCustom); + break; + case START_FE: + fmtCount++; + seekNonWs(pattern, pos); + final int start = pos.getIndex(); + final int index = readArgumentIndex(pattern, next(pos)); + stripCustom.append(START_FE).append(index); + seekNonWs(pattern, pos); + Format format = null; + String formatDescription = null; + if (c[pos.getIndex()] == START_FMT) { + formatDescription = parseFormatDescription(pattern, + next(pos)); + format = getFormat(formatDescription); + if (format == null) { + stripCustom.append(START_FMT).append(formatDescription); + } + } + foundFormats.add(format); + foundDescriptions.add(format == null ? null : formatDescription); + if(foundFormats.size() != fmtCount) { + throw new IllegalArgumentException("The validated expression is false"); + } + if (foundDescriptions.size() != fmtCount) { + throw new IllegalArgumentException("The validated expression is false"); + } + if (c[pos.getIndex()] != END_FE) { + throw new IllegalArgumentException( + "Unreadable format element at position " + start); + } + //$FALL-THROUGH$ + default: + stripCustom.append(c[pos.getIndex()]); + next(pos); + } + } + super.applyPattern(stripCustom.toString()); + toPattern = insertFormats(super.toPattern(), foundDescriptions); + if (containsElements(foundFormats)) { + final Format[] origFormats = getFormats(); + // only loop over what we know we have, as MessageFormat on Java 1.3 + // seems to provide an extra format element: + int i = 0; + for (final Iterator<Format> it = foundFormats.iterator(); it.hasNext(); i++) { + final Format f = it.next(); + if (f != null) { + origFormats[i] = f; + } + } + super.setFormats(origFormats); + } + } + + /** + * Throws UnsupportedOperationException - see class Javadoc for details. + * + * @param formatElementIndex format element index + * @param newFormat the new format + * @throws UnsupportedOperationException always thrown since this isn't supported by ExtendMessageFormat + */ + @Override + public void setFormat(final int formatElementIndex, final Format newFormat) { + throw new UnsupportedOperationException(); + } + + /** + * Throws UnsupportedOperationException - see class Javadoc for details. + * + * @param argumentIndex argument index + * @param newFormat the new format + * @throws UnsupportedOperationException always thrown since this isn't supported by ExtendMessageFormat + */ + @Override + public void setFormatByArgumentIndex(final int argumentIndex, final Format newFormat) { + throw new UnsupportedOperationException(); + } + + /** + * Throws UnsupportedOperationException - see class Javadoc for details. + * + * @param newFormats new formats + * @throws UnsupportedOperationException always thrown since this isn't supported by ExtendMessageFormat + */ + @Override + public void setFormats(final Format[] newFormats) { + throw new UnsupportedOperationException(); + } + + /** + * Throws UnsupportedOperationException - see class Javadoc for details. + * + * @param newFormats new formats + * @throws UnsupportedOperationException always thrown since this isn't supported by ExtendMessageFormat + */ + @Override + public void setFormatsByArgumentIndex(final Format[] newFormats) { + throw new UnsupportedOperationException(); + } + + /** + * Check if this extended message format is equal to another object. + * + * @param obj the object to compare to + * @return true if this object equals the other, otherwise false + */ + @Override + public boolean equals(final Object obj) { + if (obj == this) { + return true; + } + if (obj == null) { + return false; + } + if (!super.equals(obj)) { + return false; + } + if (!Objects.equals(getClass(), obj.getClass())) { + return false; + } + final ExtendedMessageFormat rhs = (ExtendedMessageFormat)obj; + if (!Objects.equals(toPattern, rhs.toPattern)) { + return false; + } + if (!Objects.equals(registry, rhs.registry)) { + return false; + } + return true; + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() { + int result = super.hashCode(); + result = HASH_SEED * result + Objects.hashCode(registry); + result = HASH_SEED * result + Objects.hashCode(toPattern); + return result; + } + + /** + * Get a custom format from a format description. + * + * @param desc String + * @return Format + */ + private Format getFormat(final String desc) { + if (registry != null) { + String name = desc; + String args = null; + final int i = desc.indexOf(START_FMT); + if (i > 0) { + name = desc.substring(0, i).trim(); + args = desc.substring(i + 1).trim(); + } + final FormatFactory factory = registry.get(name); + if (factory != null) { + return factory.getFormat(name, args, getLocale()); + } + } + return null; + } + + /** + * Read the argument index from the current format element + * + * @param pattern pattern to parse + * @param pos current parse position + * @return argument index + */ + private int readArgumentIndex(final String pattern, final ParsePosition pos) { + final int start = pos.getIndex(); + seekNonWs(pattern, pos); + final StringBuilder result = new StringBuilder(); + boolean error = false; + for (; !error && pos.getIndex() < pattern.length(); next(pos)) { + char c = pattern.charAt(pos.getIndex()); + if (Character.isWhitespace(c)) { + seekNonWs(pattern, pos); + c = pattern.charAt(pos.getIndex()); + if (c != START_FMT && c != END_FE) { + error = true; + continue; + } + } + if ((c == START_FMT || c == END_FE) && result.length() > 0) { + try { + return Integer.parseInt(result.toString()); + } catch (final NumberFormatException e) { // NOPMD + // we've already ensured only digits, so unless something + // outlandishly large was specified we should be okay. + } + } + error = !Character.isDigit(c); + result.append(c); + } + if (error) { + throw new IllegalArgumentException( + "Invalid format argument index at position " + start + ": " + + pattern.substring(start, pos.getIndex())); + } + throw new IllegalArgumentException( + "Unterminated format element at position " + start); + } + + /** + * Parse the format component of a format element. + * + * @param pattern string to parse + * @param pos current parse position + * @return Format description String + */ + private String parseFormatDescription(final String pattern, final ParsePosition pos) { + final int start = pos.getIndex(); + seekNonWs(pattern, pos); + final int text = pos.getIndex(); + int depth = 1; + for (; pos.getIndex() < pattern.length(); next(pos)) { + switch (pattern.charAt(pos.getIndex())) { + case START_FE: + depth++; + break; + case END_FE: + depth--; + if (depth == 0) { + return pattern.substring(text, pos.getIndex()); + } + break; + case QUOTE: + getQuotedString(pattern, pos); + break; + default: + break; + } + } + throw new IllegalArgumentException( + "Unterminated format element at position " + start); + } + + /** + * Insert formats back into the pattern for toPattern() support. + * + * @param pattern source + * @param customPatterns The custom patterns to re-insert, if any + * @return full pattern + */ + private String insertFormats(final String pattern, final ArrayList<String> customPatterns) { + if (!containsElements(customPatterns)) { + return pattern; + } + final StringBuilder sb = new StringBuilder(pattern.length() * 2); + final ParsePosition pos = new ParsePosition(0); + int fe = -1; + int depth = 0; + while (pos.getIndex() < pattern.length()) { + final char c = pattern.charAt(pos.getIndex()); + switch (c) { + case QUOTE: + appendQuotedString(pattern, pos, sb); + break; + case START_FE: + depth++; + sb.append(START_FE).append(readArgumentIndex(pattern, next(pos))); + // do not look for custom patterns when they are embedded, e.g. in a choice + if (depth == 1) { + fe++; + final String customPattern = customPatterns.get(fe); + if (customPattern != null) { + sb.append(START_FMT).append(customPattern); + } + } + break; + case END_FE: + depth--; + //$FALL-THROUGH$ + default: + sb.append(c); + next(pos); + } + } + return sb.toString(); + } + + /** + * Consume whitespace from the current parse position. + * + * @param pattern String to read + * @param pos current position + */ + private void seekNonWs(final String pattern, final ParsePosition pos) { + int len = 0; + final char[] buffer = pattern.toCharArray(); + do { + len = StrMatcher.splitMatcher().isMatch(buffer, pos.getIndex()); + pos.setIndex(pos.getIndex() + len); + } while (len > 0 && pos.getIndex() < pattern.length()); + } + + /** + * Convenience method to advance parse position by 1 + * + * @param pos ParsePosition + * @return <code>pos</code> + */ + private ParsePosition next(final ParsePosition pos) { + pos.setIndex(pos.getIndex() + 1); + return pos; + } + + /** + * Consume a quoted string, adding it to <code>appendTo</code> if + * specified. + * + * @param pattern pattern to parse + * @param pos current parse position + * @param appendTo optional StringBuilder to append + * @return <code>appendTo</code> + */ + private StringBuilder appendQuotedString(final String pattern, final ParsePosition pos, + final StringBuilder appendTo) { + assert pattern.toCharArray()[pos.getIndex()] == QUOTE : + "Quoted string must start with quote character"; + + // handle quote character at the beginning of the string + if(appendTo != null) { + appendTo.append(QUOTE); + } + next(pos); + + final int start = pos.getIndex(); + final char[] c = pattern.toCharArray(); + final int lastHold = start; + for (int i = pos.getIndex(); i < pattern.length(); i++) { + switch (c[pos.getIndex()]) { + case QUOTE: + next(pos); + return appendTo == null ? null : appendTo.append(c, lastHold, + pos.getIndex() - lastHold); + default: + next(pos); + } + } + throw new IllegalArgumentException( + "Unterminated quoted string at position " + start); + } + + /** + * Consume quoted string only + * + * @param pattern pattern to parse + * @param pos current parse position + */ + private void getQuotedString(final String pattern, final ParsePosition pos) { + appendQuotedString(pattern, pos, null); + } + + /** + * Learn whether the specified Collection contains non-null elements. + * @param coll to check + * @return <code>true</code> if some Object was found, <code>false</code> otherwise. + */ + private boolean containsElements(final Collection<?> coll) { + if (coll == null || coll.isEmpty()) { + return false; + } + for (final Object name : coll) { + if (name != null) { + return true; + } + } + return false; + } +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/FormatFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/FormatFactory.java b/src/main/java/org/apache/commons/text/beta/FormatFactory.java new file mode 100644 index 0000000..0daa581 --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/FormatFactory.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +import java.text.Format; +import java.util.Locale; + +/** + * Format factory. + * + * @since 1.0 + */ +public interface FormatFactory { + + /** + * Create or retrieve a format instance. + * + * @param name The format type name + * @param arguments Arguments used to create the format instance. This allows the + * <code>FormatFactory</code> to implement the "format style" + * concept from <code>java.text.MessageFormat</code>. + * @param locale The locale, may be null + * @return The format instance + */ + Format getFormat(String name, String arguments, Locale locale); + +} http://git-wip-us.apache.org/repos/asf/commons-text/blob/4d927351/src/main/java/org/apache/commons/text/beta/FormattableUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/beta/FormattableUtils.java b/src/main/java/org/apache/commons/text/beta/FormattableUtils.java new file mode 100644 index 0000000..206a055 --- /dev/null +++ b/src/main/java/org/apache/commons/text/beta/FormattableUtils.java @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.text.beta; + +import java.util.Formattable; +import java.util.Formatter; + +import static java.util.FormattableFlags.LEFT_JUSTIFY; + +/** + * <p>Provides utilities for working with the {@code Formattable} interface.</p> + * + * <p>The {@link Formattable} interface provides basic control over formatting + * when using a {@code Formatter}. It is primarily concerned with numeric precision + * and padding, and is not designed to allow generalised alternate formats.</p> + * + * @since 1.0 + * + */ +public class FormattableUtils { + + /** + * A format that simply outputs the value as a string. + */ + private static final String SIMPLEST_FORMAT = "%s"; + + /** + * <p>{@code FormattableUtils} instances should NOT be constructed in + * standard programming. Instead, the methods of the class should be invoked + * statically.</p> + * + * <p>This constructor is public to permit tools that require a JavaBean + * instance to operate.</p> + */ + public FormattableUtils() { + super(); + } + + //----------------------------------------------------------------------- + /** + * Get the default formatted representation of the specified + * {@code Formattable}. + * + * @param formattable the instance to convert to a string, not null + * @return the resulting string, not null + */ + public static String toString(final Formattable formattable) { + return String.format(SIMPLEST_FORMAT, formattable); + } + + /** + * Handles the common {@code Formattable} operations of truncate-pad-append, + * with no ellipsis on precision overflow, and padding width underflow with + * spaces. + * + * @param seq the string to handle, not null + * @param formatter the destination formatter, not null + * @param flags the flags for formatting, see {@code Formattable} + * @param width the width of the output, see {@code Formattable} + * @param precision the precision of the output, see {@code Formattable} + * @return the {@code formatter} instance, not null + */ + public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, + final int precision) { + return append(seq, formatter, flags, width, precision, ' ', null); + } + + /** + * Handles the common {@link Formattable} operations of truncate-pad-append, + * with no ellipsis on precision overflow. + * + * @param seq the string to handle, not null + * @param formatter the destination formatter, not null + * @param flags the flags for formatting, see {@code Formattable} + * @param width the width of the output, see {@code Formattable} + * @param precision the precision of the output, see {@code Formattable} + * @param padChar the pad character to use + * @return the {@code formatter} instance, not null + */ + public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, + final int precision, final char padChar) { + return append(seq, formatter, flags, width, precision, padChar, null); + } + + /** + * Handles the common {@link Formattable} operations of truncate-pad-append, + * padding width underflow with spaces. + * + * @param seq the string to handle, not null + * @param formatter the destination formatter, not null + * @param flags the flags for formatting, see {@code Formattable} + * @param width the width of the output, see {@code Formattable} + * @param precision the precision of the output, see {@code Formattable} + * @param ellipsis the ellipsis to use when precision dictates truncation, null or + * empty causes a hard truncation + * @return the {@code formatter} instance, not null + */ + public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, + final int precision, final CharSequence ellipsis) { + return append(seq, formatter, flags, width, precision, ' ', ellipsis); + } + + /** + * Handles the common {@link Formattable} operations of truncate-pad-append. + * + * @param seq the string to handle, not null + * @param formatter the destination formatter, not null + * @param flags the flags for formatting, see {@code Formattable} + * @param width the width of the output, see {@code Formattable} + * @param precision the precision of the output, see {@code Formattable} + * @param padChar the pad character to use + * @param ellipsis the ellipsis to use when precision dictates truncation, null or + * empty causes a hard truncation + * @return the {@code formatter} instance, not null + */ + public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width, + final int precision, final char padChar, final CharSequence ellipsis) { + if ( ! (ellipsis == null || precision < 0 || ellipsis.length() <= precision) ) { + throw new IllegalArgumentException(String.format("Specified ellipsis '%1$s' exceeds precision of %2$s", ellipsis, Integer.valueOf(precision))); + } + final StringBuilder buf = new StringBuilder(seq); + if (precision >= 0 && precision < seq.length()) { + final CharSequence _ellipsis; + if (ellipsis == null) { + _ellipsis = ""; + } else { + _ellipsis = ellipsis; + } + buf.replace(precision - _ellipsis.length(), seq.length(), _ellipsis.toString()); + } + final boolean leftJustify = (flags & LEFT_JUSTIFY) == LEFT_JUSTIFY; + for (int i = buf.length(); i < width; i++) { + buf.insert(leftJustify ? i : 0, padChar); + } + formatter.format(buf.toString()); + return formatter; + } + +}