Author: oheger
Date: Tue Oct  2 19:52:07 2018
New Revision: 1842649

URL: http://svn.apache.org/viewvc?rev=1842649&view=rev
Log:
[CONFIGURATION-715/716] Added alternative IOFactory implementation.

The new JupIOFactory class provides readers and writers for properties
files that mimic the behavior of java.util.Properties more closely.

Thanks to Patrick Schmidt for the patch.

Added:
    commons/proper/configuration/trunk/src/test/resources/jup-test.properties
Modified:
    
commons/proper/configuration/trunk/src/main/java/org/apache/commons/configuration2/PropertiesConfiguration.java
    
commons/proper/configuration/trunk/src/test/java/org/apache/commons/configuration2/TestPropertiesConfiguration.java

Modified: 
commons/proper/configuration/trunk/src/main/java/org/apache/commons/configuration2/PropertiesConfiguration.java
URL: 
http://svn.apache.org/viewvc/commons/proper/configuration/trunk/src/main/java/org/apache/commons/configuration2/PropertiesConfiguration.java?rev=1842649&r1=1842648&r2=1842649&view=diff
==============================================================================
--- 
commons/proper/configuration/trunk/src/main/java/org/apache/commons/configuration2/PropertiesConfiguration.java
 (original)
+++ 
commons/proper/configuration/trunk/src/main/java/org/apache/commons/configuration2/PropertiesConfiguration.java
 Tue Oct  2 19:52:07 2018
@@ -816,7 +816,7 @@ public class PropertiesConfiguration ext
          */
         protected void parseProperty(final String line)
         {
-            final String[] property = doParseProperty(line);
+            final String[] property = doParseProperty(line, true);
             initPropertyName(property[0]);
             initPropertyValue(property[1]);
             initPropertySeparator(property[2]);
@@ -833,7 +833,19 @@ public class PropertiesConfiguration ext
          */
         protected void initPropertyName(final String name)
         {
-            propertyName = StringEscapeUtils.unescapeJava(name);
+            propertyName = unescapePropertyName(name);
+        }
+
+        /**
+         * Performs unescaping on the given property name.
+         *
+         * @param name the property name
+         * @return the unescaped property name
+         * @since 2.4
+         */
+        protected String unescapePropertyName(String name)
+        {
+            return StringEscapeUtils.unescapeJava(name);
         }
 
         /**
@@ -847,7 +859,19 @@ public class PropertiesConfiguration ext
          */
         protected void initPropertyValue(final String value)
         {
-            propertyValue = unescapeJava(value);
+            propertyValue = unescapePropertyValue(value);
+        }
+
+        /**
+         * Performs unescaping on the given property value.
+         *
+         * @param value the property value
+         * @return the unescaped property value
+         * @since 2.4
+         */
+        protected String unescapePropertyValue(String value)
+        {
+            return unescapeJava(value);
         }
 
         /**
@@ -871,18 +895,20 @@ public class PropertiesConfiguration ext
          * @param line the line
          * @return a flag if the lines should be combined
          */
-        private static boolean checkCombineLines(final String line)
+        static boolean checkCombineLines(final String line)
         {
             return countTrailingBS(line) % 2 != 0;
         }
 
         /**
-         * Parse a property line and return the key, the value, and the 
separator in an array.
+         * Parse a property line and return the key, the value, and the 
separator in an
+         * array.
          *
          * @param line the line to parse
+         * @param trimValue flag whether the value is to be trimmed
          * @return an array with the property's key, value, and separator
          */
-        private static String[] doParseProperty(final String line)
+        static String[] doParseProperty(final String line, final boolean 
trimValue)
         {
             final Matcher matcher = PROPERTY_PATTERN.matcher(line);
 
@@ -891,7 +917,14 @@ public class PropertiesConfiguration ext
             if (matcher.matches())
             {
                 result[0] = matcher.group(IDX_KEY).trim();
-                result[1] = matcher.group(IDX_VALUE).trim();
+
+                String value = matcher.group(IDX_VALUE);
+                if (trimValue)
+                {
+                    value = value.trim();
+                }
+                result[1] = value;
+
                 result[2] = matcher.group(IDX_SEPARATOR);
             }
 
@@ -934,7 +967,7 @@ public class PropertiesConfiguration ext
          * values. This implementation applies the transformation defined by 
the
          * {@link #ESCAPE_PROPERTIES} translator.
          */
-        private static final ValueTransformer TRANSFORMER =
+        private static final ValueTransformer DEFAULT_TRANSFORMER =
                 new ValueTransformer()
                 {
                     @Override
@@ -945,6 +978,9 @@ public class PropertiesConfiguration ext
                     }
                 };
 
+        /** The value transformer used for escaping property values. */
+        private final ValueTransformer valueTransformer;
+
         /** The list delimiter handler.*/
         private final ListDelimiterHandler delimiterHandler;
 
@@ -966,8 +1002,22 @@ public class PropertiesConfiguration ext
          */
         public PropertiesWriter(final Writer writer, final 
ListDelimiterHandler delHandler)
         {
+            this(writer, delHandler, DEFAULT_TRANSFORMER);
+        }
+
+        /**
+         * Creates a new instance of {@code PropertiesWriter}.
+         *
+         * @param writer a Writer object providing the underlying stream
+         * @param delHandler the delimiter handler for dealing with properties
+         *        with multiple values
+         * @param valueTransformer the value transformer used to escape 
property values
+         */
+        public PropertiesWriter(Writer writer, ListDelimiterHandler 
delHandler, ValueTransformer valueTransformer)
+        {
             super(writer);
             delimiterHandler = delHandler;
+            this.valueTransformer = valueTransformer;
         }
 
         /**
@@ -1111,7 +1161,7 @@ public class PropertiesConfiguration ext
                     try
                     {
                         v = String.valueOf(getDelimiterHandler()
-                                        .escapeList(values, TRANSFORMER));
+                                        .escapeList(values, valueTransformer));
                     }
                     catch (final UnsupportedOperationException uoex)
                     {
@@ -1127,7 +1177,7 @@ public class PropertiesConfiguration ext
             }
             else
             {
-                v = String.valueOf(getDelimiterHandler().escape(value, 
TRANSFORMER));
+                v = String.valueOf(getDelimiterHandler().escape(value, 
valueTransformer));
             }
 
             write(escapeKey(key));
@@ -1166,7 +1216,8 @@ public class PropertiesConfiguration ext
             {
                 final char c = key.charAt(i);
 
-                if (ArrayUtils.contains(SEPARATORS, c) || 
ArrayUtils.contains(WHITE_SPACE, c))
+                if (ArrayUtils.contains(SEPARATORS, c) || 
ArrayUtils.contains(WHITE_SPACE, c) ||
+                        c == '\\')
                 {
                     // escape the separator
                     newkey.append('\\');
@@ -1301,6 +1352,233 @@ public class PropertiesConfiguration ext
     }
 
     /**
+     * An alternative {@link IOFactory} that tries to mimic the behavior of
+     * {@link java.util.Properties} (Jup) more closely.
+     * <p>
+     * It also has the option to <em>not</em> use Unicode escapes. When using 
UTF-8
+     * encoding (which is e.g. the new default for resource bundle properties 
files
+     * since Java 9), Unicode escapes are no longer required and avoiding them 
makes
+     * properties files more readable with regular text editors.
+     * <ul>
+     * <li>Trailing whitespace will not be trimmed from each line.</li>
+     * <li>Unknown escape sequences will have their backslash removed.</li>
+     * <li>{@code \b} is not a recognized escape sequence.</li>
+     * <li>Leading spaces in property values are preserved by escaping 
them.</li>
+     * <li></li>
+     * </ul>
+     *
+     * @since 2.4
+     */
+    public static class JupIOFactory implements IOFactory
+    {
+
+        /**
+         * Whether characters less than {@code \u0020} and characters greater 
than
+         * {@code \u007E} in property keys or values should be escaped using
+         * Unicode escape sequences. Not necessary when e.g. writing as UTF-8.
+         */
+        private final boolean escapeUnicode;
+
+        /**
+         * Constructs a new {@link JupIOFactory} with Unicode escaping.
+         */
+        public JupIOFactory()
+        {
+            this(true);
+        }
+
+        /**
+         * Constructs a new {@link JupIOFactory} with optional Unicode 
escaping. Whether
+         * Unicode escaping is required depends on the encoding used to save 
the
+         * properties file. E.g. for ISO-8859-1 this must be turned on, for 
UTF-8 it's
+         * not necessary. Unfortunately this factory can't determine the 
encoding on its
+         * own.
+         *
+         * @param escapeUnicode whether Unicode characters should be escaped
+         */
+        public JupIOFactory(boolean escapeUnicode)
+        {
+            this.escapeUnicode = escapeUnicode;
+        }
+
+        @Override
+        public PropertiesReader createPropertiesReader(Reader in)
+        {
+            return new JupPropertiesReader(in);
+        }
+
+        @Override
+        public PropertiesWriter createPropertiesWriter(Writer out, 
ListDelimiterHandler handler)
+        {
+            return new JupPropertiesWriter(out, handler, escapeUnicode);
+        }
+
+    }
+
+    /**
+     * A {@link PropertiesReader} that tries to mimic the behavior of
+     * {@link java.util.Properties}.
+     *
+     * @since 2.4
+     */
+    public static class JupPropertiesReader extends PropertiesReader
+    {
+
+        /**
+         * Constructor.
+         *
+         * @param reader A Reader.
+         */
+        public JupPropertiesReader(Reader reader)
+        {
+            super(reader);
+        }
+
+
+        @Override
+        public String readProperty() throws IOException
+        {
+            getCommentLines().clear();
+            StringBuilder buffer = new StringBuilder();
+
+            while (true)
+            {
+                String line = readLine();
+                if (line == null)
+                {
+                    // EOF
+                    if (buffer.length() > 0)
+                    {
+                        break;
+                    }
+                    return null;
+                }
+
+                // while a property line continues there are no comments (even 
if the line from
+                // the file looks like one)
+                if (isCommentLine(line) && (buffer.length() == 0))
+                {
+                    getCommentLines().add(line);
+                    continue;
+                }
+
+                // while property line continues left trim all following lines 
read from the
+                // file
+                if (buffer.length() > 0)
+                {
+                    // index of the first non-whitespace character
+                    int i;
+                    for (i = 0; i < line.length(); i++)
+                    {
+                        if (!Character.isWhitespace(line.charAt(i)))
+                        {
+                            break;
+                        }
+                    }
+
+                    line = line.substring(i);
+                }
+
+                if (checkCombineLines(line))
+                {
+                    line = line.substring(0, line.length() - 1);
+                    buffer.append(line);
+                }
+                else
+                {
+                    buffer.append(line);
+                    break;
+                }
+            }
+            return buffer.toString();
+        }
+
+        @Override
+        protected void parseProperty(String line)
+        {
+            String[] property = doParseProperty(line, false);
+            initPropertyName(property[0]);
+            initPropertyValue(property[1]);
+            initPropertySeparator(property[2]);
+        }
+
+        @Override
+        protected String unescapePropertyValue(String value)
+        {
+            return unescapeJava(value, true);
+        }
+
+    }
+
+    /**
+     * A {@link PropertiesWriter} that tries to mimic the behavior of
+     * {@link java.util.Properties}.
+     *
+     * @since 2.4
+     */
+    public static class JupPropertiesWriter extends PropertiesWriter
+    {
+
+        /**
+         * Characters that need to be escaped when wring a properties file.
+         */
+        private static final Map<CharSequence, CharSequence> JUP_CHARS_ESCAPE;
+        static
+        {
+            Map<CharSequence, CharSequence> initialMap = new HashMap<>();
+            initialMap.put("\\", "\\\\");
+            initialMap.put("\n", "\\n");
+            initialMap.put("\t", "\\t");
+            initialMap.put("\f", "\\f");
+            initialMap.put("\r", "\\r");
+            JUP_CHARS_ESCAPE = Collections.unmodifiableMap(initialMap);
+        };
+
+        /**
+         * Creates a new instance of {@code JupPropertiesWriter}.
+         *
+         * @param writer a Writer object providing the underlying stream
+         * @param delHandler the delimiter handler for dealing with properties 
with
+         *        multiple values
+         * @param escapeUnicode whether Unicode characters should be escaped 
using
+         *        Unicode escapes
+         */
+        public JupPropertiesWriter(Writer writer, ListDelimiterHandler 
delHandler, final boolean escapeUnicode)
+        {
+            super(writer, delHandler, new ValueTransformer()
+            {
+                @Override
+                public Object transformValue(Object value)
+                {
+                    String valueString = String.valueOf(value);
+
+                    CharSequenceTranslator translator;
+                    if (escapeUnicode)
+                    {
+                        translator = new AggregateTranslator(new 
LookupTranslator(JUP_CHARS_ESCAPE),
+                                UnicodeEscaper.outsideOf(0x20, 0x7e));
+                    }
+                    else
+                    {
+                        translator = new AggregateTranslator(new 
LookupTranslator(JUP_CHARS_ESCAPE));
+                    }
+
+                    valueString = translator.translate(valueString);
+
+                    // escape the first leading space to preserve it (and all 
after it)
+                    if (valueString.startsWith(" "))
+                    {
+                        valueString = "\\" + valueString;
+                    }
+
+                    return valueString;
+                }
+            });
+        }
+
+    }
+
+    /**
      * <p>Unescapes any Java literals found in the {@code String} to a
      * {@code Writer}.</p> This is a slightly modified version of the
      * StringEscapeUtils.unescapeJava() function in commons-lang that doesn't
@@ -1312,6 +1590,25 @@ public class PropertiesConfiguration ext
      */
     protected static String unescapeJava(final String str)
     {
+        return unescapeJava(str, false);
+    }
+
+    /**
+     * Unescapes Java literals found in the {@code String} to a {@code Writer}.
+     * </p>
+     * When the parameter {@code jupCompatible} is {@code false}, the classic
+     * behavior is used (see {@link #unescapeJava(String)}). When it's {@code 
true}
+     * a slightly different behavior that's compatible with
+     * {@link java.util.Properties} is used (see {@link JupIOFactory}).
+     *
+     * @param str the {@code String} to unescape, may be null
+     * @param jupCompatible whether unescaping is compatible with
+     *        {@link java.util.Properties}; otherwise the classic behavior is 
used
+     * @return the processed string
+     * @throws IllegalArgumentException if the Writer is {@code null}
+     */
+    protected static String unescapeJava(String str, boolean jupCompatible)
+    {
         if (str == null)
         {
             return null;
@@ -1370,7 +1667,8 @@ public class PropertiesConfiguration ext
                 {
                     out.append('\n');
                 }
-                else if (ch == 'b')
+                // JUP does not recognize \b
+                else if (!jupCompatible && ch == 'b')
                 {
                     out.append('\b');
                 }
@@ -1385,7 +1683,11 @@ public class PropertiesConfiguration ext
                 }
                 else
                 {
-                    out.append('\\');
+                    // JUP simply throws away the \ of unknown escape sequences
+                    if (!jupCompatible)
+                    {
+                        out.append('\\');
+                    }
                     out.append(ch);
                 }
 

Modified: 
commons/proper/configuration/trunk/src/test/java/org/apache/commons/configuration2/TestPropertiesConfiguration.java
URL: 
http://svn.apache.org/viewvc/commons/proper/configuration/trunk/src/test/java/org/apache/commons/configuration2/TestPropertiesConfiguration.java?rev=1842649&r1=1842648&r2=1842649&view=diff
==============================================================================
--- 
commons/proper/configuration/trunk/src/test/java/org/apache/commons/configuration2/TestPropertiesConfiguration.java
 (original)
+++ 
commons/proper/configuration/trunk/src/test/java/org/apache/commons/configuration2/TestPropertiesConfiguration.java
 Tue Oct  2 19:52:07 2018
@@ -22,13 +22,18 @@ import java.net.HttpURLConnection;
 import java.net.URL;
 import java.net.URLConnection;
 import java.net.URLStreamHandler;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Properties;
 import java.util.Set;
 
+import org.apache.commons.collections.IteratorUtils;
 import org.apache.commons.configuration2.SynchronizerTestImpl.Methods;
 import 
org.apache.commons.configuration2.builder.FileBasedBuilderParametersImpl;
 import org.apache.commons.configuration2.builder.FileBasedConfigurationBuilder;
@@ -1211,6 +1216,147 @@ public class TestPropertiesConfiguration
         }
     }
 
+    /**
+     * Tests that {@link PropertiesConfiguration.JupIOFactory} reads the same 
keys
+     * and values as {@link Properties} based on a test file.
+     */
+    @Test
+    public void testJupRead() throws IOException, ConfigurationException
+    {
+        conf.clear();
+        conf.setIOFactory(new PropertiesConfiguration.JupIOFactory());
+
+        String testFilePath = 
ConfigurationAssert.getTestFile("jup-test.properties").getAbsolutePath();
+
+        load(conf, testFilePath);
+
+        Properties jup = new Properties();
+        try (InputStream in = Files.newInputStream(Paths.get(testFilePath)))
+        {
+            jup.load(in);
+        }
+
+        @SuppressWarnings("unchecked")
+        Set<Object> pcKeys = new 
HashSet<>(IteratorUtils.toList(conf.getKeys()));
+        assertEquals(jup.keySet(), pcKeys);
+
+        for (Object key : jup.keySet())
+        {
+            String keyString = key.toString();
+            System.out.println(keyString);
+            assertEquals("Wrong property value for '" + keyString + "'", 
jup.getProperty(keyString),
+                    conf.getProperty(keyString));
+        }
+    }
+
+    /**
+     * Tests that {@link PropertiesConfiguration.JupIOFactory} writes 
properties in
+     * a way that allows {@link Properties} to read them exactly like they 
were set.
+     */
+    @Test
+    public void testJupWrite() throws IOException, ConfigurationException
+    {
+        conf.clear();
+        conf.setIOFactory(new PropertiesConfiguration.JupIOFactory());
+
+        String testFilePath = 
ConfigurationAssert.getTestFile("jup-test.properties").getAbsolutePath();
+
+        // read the test properties and set them on the PropertiesConfiguration
+        Properties origProps = new Properties();
+        try (InputStream in = Files.newInputStream(Paths.get(testFilePath)))
+        {
+            origProps.load(in);
+        }
+        for (Object key : origProps.keySet())
+        {
+            String keyString = key.toString();
+            conf.setProperty(keyString, origProps.getProperty(keyString));
+        }
+
+        // save the configuration
+        saveTestConfig();
+        assertTrue("The saved file doesn't exist", 
testSavePropertiesFile.exists());
+
+        // load the saved file...
+        Properties testProps = new Properties();
+        try (InputStream in = 
Files.newInputStream(testSavePropertiesFile.toPath()))
+        {
+            testProps.load(in);
+        }
+
+        // ... and compare the properties to the originals
+        @SuppressWarnings("unchecked")
+        Set<Object> pcKeys = new 
HashSet<>(IteratorUtils.toList(conf.getKeys()));
+        assertEquals(testProps.keySet(), pcKeys);
+
+        for (Object key : testProps.keySet())
+        {
+            String keyString = key.toString();
+            assertEquals("Wrong property value for '" + keyString + "'", 
testProps.getProperty(keyString),
+                    conf.getProperty(keyString));
+        }
+    }
+
+    /**
+     * Tests that {@link PropertiesConfiguration.JupIOFactory} writes 
properties in
+     * a way that allows {@link Properties} to read them exactly like they 
were set.
+     * This test writes in UTF-8 encoding, with Unicode escapes turned off.
+     */
+    @Test
+    public void testJupWriteUtf8WithoutUnicodeEscapes() throws IOException, 
ConfigurationException
+    {
+        conf.clear();
+        conf.setIOFactory(new PropertiesConfiguration.JupIOFactory(false));
+
+        String testFilePath = 
ConfigurationAssert.getTestFile("jup-test.properties").getAbsolutePath();
+
+        // read the test properties and set them on the PropertiesConfiguration
+        Properties origProps = new Properties();
+        try (InputStream in = Files.newInputStream(Paths.get(testFilePath)))
+        {
+            origProps.load(in);
+        }
+        for (Object key : origProps.keySet())
+        {
+            String keyString = key.toString();
+            conf.setProperty(keyString, origProps.getProperty(keyString));
+        }
+
+        // save the configuration as UTF-8
+        final FileHandler handler = new FileHandler(conf);
+        handler.setEncoding(StandardCharsets.UTF_8.name());
+        handler.save(testSavePropertiesFile);
+        assertTrue("The saved file doesn't exist", 
testSavePropertiesFile.exists());
+
+        // load the saved file...
+        Properties testProps = new Properties();
+        try (BufferedReader in = 
Files.newBufferedReader(testSavePropertiesFile.toPath(), 
StandardCharsets.UTF_8))
+        {
+            testProps.load(in);
+        }
+
+        // ... and compare the properties to the originals
+        @SuppressWarnings("unchecked")
+        Set<Object> pcKeys = new 
HashSet<>(IteratorUtils.toList(conf.getKeys()));
+        assertEquals(testProps.keySet(), pcKeys);
+
+        for (Object key : testProps.keySet())
+        {
+            String keyString = key.toString();
+            assertEquals("Wrong property value for '" + keyString + "'", 
testProps.getProperty(keyString),
+                    conf.getProperty(keyString));
+        }
+
+        // ensure that the written properties file contains no Unicode escapes
+        for (String line : Files.readAllLines(testSavePropertiesFile.toPath()))
+        {
+            if (line.contains("\\u"))
+            {
+                fail("Unicode escape found in line: " + line);
+            }
+        }
+    }
+
     /**
      * Helper method for testing the content of a list with elements that
      * contain backslashes.

Added: commons/proper/configuration/trunk/src/test/resources/jup-test.properties
URL: 
http://svn.apache.org/viewvc/commons/proper/configuration/trunk/src/test/resources/jup-test.properties?rev=1842649&view=auto
==============================================================================
--- commons/proper/configuration/trunk/src/test/resources/jup-test.properties 
(added)
+++ commons/proper/configuration/trunk/src/test/resources/jup-test.properties 
Tue Oct  2 19:52:07 2018
@@ -0,0 +1,46 @@
+# A properties file to test the compatibility of
+# PropertiesConfiguration.JupIOFactory to java.util.Properties (JUP).
+# Test cases in here don't necessarily indicate that the DefaultIOFactory
+# violated them.
+
+# Spaces can be escaped using "\". Important for the first leading space in a
+# property value so it won't get trimmed. "\u0020" is too cumbersome.
+leadingSpace = \  abc
+
+# Trailing spaces are preserved.
+trailingSpace = abc     
+
+# For unknown escape sequences, the backslash will simply be removed.
+# Technically "\ " is also an unknown escape sequence; but it's enough to avoid
+# leading whitespace in values from being trimmed.
+unknownEscapeSequences = \a\b\c\ 
+
+# DefaultIOFactory would interpret this as a line continuation (because of the
+# trailing whitespace being trimmed). For JUP this is just an escaped space.
+withoutLineContinuation = abc\ 
+
+# When continuing a property line, # or ! is not interpreted as the start of a
+# comment line.
+withLineContinuationComments = abc\
+# def
+
+# When continuing a property line, leading whitespace of the following lines is
+# trimmed; just like the whitespace between the separator and the first
+# non-whitespace character of the property value.
+withLineContinuationLeadingWhitespace = abc\
+    ghi
+
+# Whitespace other than newlines is a valid separator.
+spaceAsSeparator abc
+tabAsSeparator abc
+
+# Keys can contain spaces when they're escaped.
+key\ With\ Spaces = abc
+
+# Keys can contain tabs when they're escaped. (Who would want that though?
+# Spaces are already pushing it...)
+key\   With\   Tabs = abc
+
+# Just some unicode escapes. Used to test JupIOFactory with disabled Unicode
+# escapes.
+unicodeEscapes = \u6C49\u8BED\u6F22\u8A9E
\ No newline at end of file


Reply via email to