This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git


The following commit(s) were added to refs/heads/master by this push:
     new 7b2ab4a  [CODEC-257] Update from Java 7 to Java 8.
7b2ab4a is described below

commit 7b2ab4a2659b987b823c7cb0a163c766557da802
Author: Gary Gregory <[email protected]>
AuthorDate: Fri Mar 22 13:18:04 2019 -0400

    [CODEC-257] Update from Java 7 to Java 8.
---
 .travis.yml                                        |    1 -
 pom.xml                                            |    4 +-
 src/changes/changes.xml                            |  970 ++++++++---------
 .../codec/language/DaitchMokotoffSoundex.java      | 1106 ++++++++++----------
 .../codec/language/ColognePhoneticTest.java        |  504 ++++-----
 5 files changed, 1292 insertions(+), 1293 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 4dcedf4..9059fff 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,7 +17,6 @@ language: java
 sudo: false
 
 jdk:
-  - openjdk7
   - oraclejdk8
   - oraclejdk9
   - oraclejdk11
diff --git a/pom.xml b/pom.xml
index ed89be0..93f308f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -224,8 +224,8 @@ limitations under the License.
     </dependency>
   </dependencies>
   <properties>
-    <maven.compiler.source>1.7</maven.compiler.source>
-    <maven.compiler.target>1.7</maven.compiler.target>
+    <maven.compiler.source>1.8</maven.compiler.source>
+    <maven.compiler.target>1.8</maven.compiler.target>
     <commons.componentid>codec</commons.componentid>
     <commons.module.name>org.apache.commons.codec</commons.module.name>
     <commons.jira.id>CODEC</commons.jira.id>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2d763d6..18a6cf4 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -1,485 +1,485 @@
-<?xml version="1.0"?>
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-<!-- $Id$ -->
-
-<!--
-This file is also used by the maven-changes-plugin to generate the release 
notes.
-Useful ways of finding items to add to this file are:
-
-1.  Add items when you fix a bug or add a feature (this makes the
-release process easy :-).
-
-2.  Do a JIRA search for tickets closed since the previous release.
-
-3.  Use the report generated by the maven-changelog-plugin to see all
-SVN commits. TBA how to use this with SVN.
-
-To generate the release notes from this file:
-
-mvn changes:announcement-generate -Prelease-notes [-Dchanges.version=m.n]
-
-The <action> type attribute can be add,update,fix,remove.
--->
-
-<document>
-  <properties>
-    <title>Changes</title>
-    <author>Apache Commons Developers</author>
-  </properties>
-  <body>
-
-    <release version="1.13" date="YYYY-MM-DD" description="TBD">
-      <!--Nothing yet -->
-    </release>
-
-    <release version="1.12" date="2019-02-04" description="Feature and fix 
release.">
-      <!-- The first attribute below should be the issue id; makes it easier 
to navigate in the IDE outline -->
-      <action issue="CODEC-252" dev="chtompki" type="fix">B64 salt generator: 
Random -> ThreadLocalRandom</action>
-      <action issue="CODEC-250" dev="sebb" type="fix" due-to="Alex 
Volodko">Wrong value calculated by Cologne Phonetic if a special character is 
placed between equal letters</action>
-      <action issue="CODEC-244" dev="ggregory" type="update">Update from Java 
6 to Java 7</action>
-      <action issue="CODEC-240" dev="ggregory" type="add" due-to="Ioannis 
Sermetziadis">Add Percent-Encoding Codec (described in RFC3986 and 
RFC7578)</action>
-      <action issue="CODEC-246" dev="ggregory" type="fix" due-to="Oscar Luis 
Vera Pérez">ColognePhoneticTest.testIsEncodeEquals missing assertions</action>
-      <action issue="CODEC-251" dev="ggregory" type="add" due-to="Gary 
Gregory">Add SHA-3 methods in DigestUtils</action>
-    </release>
-    <release version="1.11" date="2017-10-20" description="Feature and fix 
release.">
-      <!-- The first attribute below should be the issue id; makes it easier 
to navigate in the IDE outline -->
-      <action issue="CODEC-241" type="add">Add support for XXHash32</action>
-      <action issue="CODEC-234" dev="ggregory" type="update" 
due-to="Christopher Schultz, Sebb">Base32.decode should support lowercase 
letters</action>
-      <action issue="CODEC-233" dev="sebb" type="update" due-to="Yossi 
Tamari">Soundex should support more algorithm variants</action>
-      <action issue="CODEC-145" dev="sebb" type="fix" due-to="Jesse 
Glick">Base64.encodeBase64String could better use newStringUsAscii (ditto 
encodeBase64URLSafeString)</action>
-      <action issue="CODEC-144" dev="sebb" type="fix">BaseNCodec: 
encodeToString and encodeAsString methods are identical</action>
-      <action issue="CODEC-232" dev="sebb" type="fix">URLCodec is neither 
immutable nor threadsafe</action>
-      <action issue="CODEC-231" dev="sebb" 
type="fix">StringUtils.equals(CharSequence cs1, CharSequence cs2) can fail with 
String Index OBE</action>
-      <action issue="CODEC-230" dev="sebb" type="fix">URLCodec.WWW_FORM_URL 
should be private</action>
-      <action issue="CODEC-229" dev="sebb" 
type="fix">StringUtils.newStringxxx(null) should return null, not NPE</action>
-      <action issue="CODEC-220" dev="sebb" type="add">Fluent interface for 
DigestUtils</action>
-      <action issue="CODEC-222" dev="sebb" type="add">Fluent interface for 
HmacUtils</action>
-      <action issue="CODEC-225" dev="jochen" type="fix" due-to="Svetlin 
Zarev">Fix minor resource leaks</action>
-      <action issue="CODEC-223" dev="sebb" type="remove">Drop obsolete Ant 
build</action>
-      <action issue="CODEC-171" dev="sebb" type="add" due-to="Brett Okken">Add 
support for CRC32-C</action>
-      <action issue="CODEC-221" dev="sebb" type="update">HmacUtils.updateHmac 
calls reset() unnecessarily</action>
-      <action issue="CODEC-200" dev="sebb" type="fix" due-to="Luciano 
Vernaschi">Base32.HEX_DECODE_TABLE contains the wrong value 32</action>
-      <action issue="CODEC-207" dev="ggregory" type="fix" due-to="Gary 
Gregory">Charsets Javadoc breaks build when using Java 8</action>
-      <action issue="CODEC-199" dev="ggregory/sebb" type="fix" due-to="Yossi 
Tamari">Bug in HW rule in Soundex</action>
-      <action issue="CODEC-209" dev="ggregory" type="fix" due-to="Gary 
Gregory">Javadoc for SHA-224 DigestUtils methods should mention Java 1.8.0 
restriction instead of 1.4.0.</action>
-      <action issue="CODEC-219" dev="ggregory" type="fix" due-to="Gary 
Gregory, Sebb">Don't deprecate Charsets Charset constants in favor of Java 7's 
java.nio.charset.StandardCharsets</action>
-      <action issue="CODEC-217" dev="ggregory" type="add" due-to="Gary 
Gregory">Add HmacAlgorithms.HMAC_SHA_224 (Java 8 only)</action>
-      <action issue="CODEC-213" dev="ggregory" type="add" due-to="Gary 
Gregory">Support JEP 287: SHA-3 Hash Algorithms</action>
-      <action issue="CODEC-212" dev="ggregory" type="add" due-to="Gary 
Gregory">Create a minimal Digest command line utility: 
org.apache.commons.codec.digest.Digest</action>
-      <action issue="CODEC-210" dev="ggregory" type="add" due-to="Gary 
Gregory">Add DigestUtils.getDigest(String, MessageDigest)</action>
-      <action issue="CODEC-208" dev="ggregory" type="add" due-to="Gary 
Gregory">Make some DigestUtils APIs public</action>
-      <action issue="CODEC-206" dev="ggregory" type="add" due-to="Gary 
Gregory">Add java.io.File APIs to MessageDigestAlgorithm</action>
-      <action issue="CODEC-183" dev="ggregory" type="add" due-to="Steven 
Wurster">BaseNCodecOutputStream only supports writing EOF on close()</action>
-      <action issue="CODEC-195" dev="ggregory" type="add" due-to="Gary 
Gregory">Support SHA-224 in DigestUtils on Java 8</action>
-      <action issue="CODEC-194" dev="ggregory" type="add" due-to="Gary 
Gregory">Support java.nio.ByteBuffer in 
org.apache.commons.codec.binary.Hex</action>
-      <action issue="CODEC-193" dev="ggregory" type="add" due-to="Michael 
Donaghy">Support java.nio.ByteBuffer in DigestUtils</action>
-      <action issue="CODEC-202" dev="ggregory" type="add" due-to="Oleg 
Kalnichevski">Add BaseNCodec.encode(byte[], int, int) input with offset and 
length parameters for Base64 and Base32.</action>
-      <action issue="CODEC-203" dev="ggregory" type="add" due-to="Gary 
Gregory">Add convenience method decodeHex(String).</action>
-      <action issue="CODEC-205" dev="ggregory" type="add" due-to="Gary 
Gregory">Add faster CRC32 implementation.</action>
-      <action issue="CODEC-224" dev="ggregory" type="add" due-to="Gary 
Gregory">Add convenience API 
org.apache.commons.codec.binary.Hex.encodeHexString(byte[]|ByteBuffer, 
boolean).</action>
-      <action issue="CODEC-242" dev="ggregory" type="add" due-to="Gary 
Gregory">Add Automatic-Module-Name manifest entry for Java 9.</action>
-    </release>
-    <release version="1.10" date="5 November 2014" description="Feature and 
fix release.">
-      <action dev="ggregory" type="add" issue="CODEC-192" due-to="Thomas 
Neidhart">Add Daitch-Mokotoff Soundex</action>
-      <action dev="ggregory" type="add" issue="CODEC-121" due-to="Thomas 
Neidhart, Java John">QuotedPrintableCodec does not support soft line break per 
the 'quoted-printable' example on Wikipedia</action>
-      <action dev="tn" type="fix" issue="CODEC-185" due-to="Sean Busbey">Added 
clarification to Javadoc of Base64 concerning the use of the urlSafe 
parameter</action>
-      <action dev="tn" type="fix" issue="CODEC-191" due-to="Igor Savin">Added 
clarification to the Javadoc of Base[32|64]OutputStream that it is mandatory to 
call close()</action>
-      <action dev="ggregory" type="fix" issue="CODEC-188" due-to="Hendrik 
Saly">Add support for HMAC Message Authentication Code (MAC) digests</action>
-      <action dev="ggregory" type="fix" issue="CODEC-187" due-to="Michael 
Tobias, Thomas Neidhart">Beider Morse Phonetic Matching producing incorrect 
tokens</action>
-      <action dev="ggregory" type="fix" issue="CODEC-184" due-to="Cyrille 
Artho">NullPointerException in DoubleMetaPhone.isDoubleMetaphoneEqual when 
using empty strings</action>
-      <action dev="ggregory" type="add" issue="CODEC-181" due-to="Ivan 
Martinez-Ortiz">Make possible to provide padding byte to BaseNCodec in 
constructor</action>
-      <action dev="ggregory" type="fix" issue="CODEC-180" due-to="Ville 
Skyttä">Fix Javadoc 1.8.0 errors</action>
-      <action dev="ggregory" type="update" issue="CODEC-178">Deprecate 
Charsets Charset constants in favor of Java 7's 
java.nio.charset.StandardCharsets</action>
-      <action dev="ggregory" type="fix" issue="CODEC-189">Fix Java 8 build 
Javadoc errors</action>
-      <action dev="ggregory" type="update" issue="CODEC-190">Update from 
commons-parent 34 to 35</action>
-    </release>
-    <release version="1.9" date="20 December 2013" description="Feature and 
fix release.">
-      <action dev="ggregory" type="update" issue="CODEC-174" due-to="Thomas 
Champagne">Improve performance of Beider Morse encoder</action>
-      <action dev="ggregory" type="fix" issue="CODEC-175">Beider Morse does 
not close Scanners used to read config files</action>
-      <action dev="sebb" type="fix" issue="CODEC-172" due-to="Matt 
Bishop">Base32 decode table has spurious value</action>
-      <action dev="ggregory" type="fix" issue="CODEC-170" due-to="Ron Wheeler, 
Henri Yandell">Link broken in Metaphone Javadoc</action>
-      <action dev="ggregory" type="fix" issue="CODEC-176" due-to="Ville 
Skyttä">Spelling fixes in Javadoc and comments</action>
-    </release>
-    <release version="1.8" date="19 April 2013" description="Feature and fix 
release. Requires a minimum of Java 1.6.">
-      <action dev="ggregory" type="add" issue="CODEC-168" due-to="Daniel 
Cassidy">Add DigestUtils.updateDigest(MessageDigest, InputStream).</action>
-      <action dev="julius" type="add" issue="CODEC-167">Add JUnit to test our 
decode with pad character in the middle.</action>
-      <action dev="ggregory" type="add" issue="CODEC-161" due-to="crice">Add 
Match Rating Approach (MRA) phonetic algorithm encoder.</action>
-      <action dev="ggregory" type="fix" issue="CODEC-163" 
due-to="leo141">ColognePhonetic encoder unnecessarily creates many char arrays 
on every loop run.</action>
-      <action dev="sebb" type="fix" 
issue="CODEC-160">Base64.encodeBase64URLSafeString doesn't add padding 
characters at the end.</action>
-    </release>
-    <release version="1.7" date="11 September 2012" description="Feature and 
fix release. Requires a minimum of Java 1.6.">
-      <action issue="CODEC-157" dev="ggregory" type="add" due-to="ggregory">
-        DigestUtils: Add MD2 APIs.
-      </action>
-      <action issue="CODEC-156" dev="ggregory" type="add" due-to="ggregory">
-        DigestUtils: add APIs named after standard algorithm name SHA-1.
-      </action>
-      <action issue="CODEC-155" dev="ggregory" type="add" due-to="ggregory">
-        DigestUtils.getDigest(String) should throw IllegalArgumentException 
instead of RuntimeException.
-      </action>
-      <action issue="CODEC-153" dev="ggregory" type="add" due-to="ggregory">
-        Create a class MessageDigestAlgorithms to define standard algorithm 
names.
-      </action>
-      <action issue="CODEC-152" dev="ggregory" type="add" due-to="ggregory">
-        DigestUtils.getDigest(String) loses the original exception.
-      </action>
-      <action issue="CODEC-151" dev="ggregory" type="add" due-to="lathspell">
-        Remove unnecessary attempt to fill up the salt variable in UnixCrypt.
-      </action>
-      <action issue="CODEC-150" dev="ggregory" type="add" due-to="lathspell">
-        Remove unnecessary call to Math.abs().
-      </action>
-      <action issue="CODEC-148" dev="ggregory" type="add" due-to="lathspell">
-        More tests and minor things.
-      </action>
-      <action issue="CODEC-146" dev="tn" type="add" due-to="Julius Davies">
-        Added regression tests for PhoneticEngine based on Solr-3.6.0.
-      </action>
-      <action issue="CODEC-147" dev="tn" type="update">
-        BeiderMorseEncoder/PhoneticEngine: make results deterministic by using 
a LinkedHashSet
-        instead of a HashSet.
-      </action>
-      <action issue="CODEC-143" dev="sebb" type="update">
-        StringBuffer could be replaced by StringBuilder for local variables.
-      </action>
-      <action issue="CODEC-139" dev="ggregory" type="add" due-to="dsebastien">
-        DigestUtils: add updateDigest methods and make methods public.
-      </action>
-      <action issue="CODEC-133" dev="ggregory" type="add" due-to="lathspell">
-        Add classes for MD5/SHA1/SHA-512-based Unix crypt(3) hash variants.
-      </action>
-      <action issue="CODEC-96" dev="ggregory" type="fix" due-to="sebb">
-        Base64 encode() method is no longer thread-safe, breaking clients 
using it as a shared BinaryEncoder.
-        Note: the fix breaks binary compatibility, however the changes are to 
a class (BaseNCodec) which is
-        intended for internal use.
-      </action>
-      <action issue="CODEC-138" dev="sebb" type="fix">
-        Complete FilterInputStream interface for BaseNCodecInputStream.
-      </action>
-      <action issue="CODEC-136" dev="ggregory" type="fix">
-        Use Charset objects when possible, create Charsets for required 
character encodings.
-      </action>
-      <action issue="CODEC-132" dev="ggregory" type="fix" due-to="rcmuir">
-        BeiderMorseEncoder OOM issues.
-      </action>
-      <action issue="CODEC-131" dev="tn" type="fix" due-to="smolav">
-        DoubleMetaphone Javadoc contains dead links.
-      </action>
-      <action issue="CODEC-130" dev="ggregory" type="add" due-to="tn">
-        Base64InputStream.skip skips underlying stream, not output.
-      </action>
-      <action issue="CODEC-63" dev="ggregory" type="add" due-to="tn">
-        Implement NYSIIS phonetic encoder.
-      </action>
-    </release>
-    <release version="1.6" date="20 November 2011" description="Feature and 
fix release. Requires a minimum of Java 1.5.">
-      <action dev="ggregory" type="fix" issue="CODEC-129" due-to="ggregory">
-        Use standard Maven directory layout.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-128" 
due-to="[email protected]">
-        Documentation spelling fixes.
-      </action>
-      <action dev="ggregory, sebb" type="fix" issue="CODEC-127">
-        Fix various character encoding issues in comments and test cases.
-      </action>
-      <action dev="ggregory, matthewpocock" type="update" issue="CODEC-125" 
due-to="Matthew Pocock">
-        Implement a Beider-Morse phonetic matching codec.
-      </action>
-      <action dev="ggregory" type="update" issue="CODEC-119">
-        Migrate to Java 5.
-      </action>
-      <action dev="ggregory" type="update" issue="CODEC-120">
-        Migrate to JUnit 4.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-123">
-        ColognePhonetic Javadoc should use HTML entities for special 
characters.
-      </action>
-    </release>
-    <release version="1.5" date="29 March 2011" description="Feature and fix 
release. Requires a minimum of Java 1.4.">
-      <action dev="sebb" type="add" issue="CODEC-88">
-        Added new Base32 encoder.
-      </action>
-      <action dev="sebb" type="fix" issue="CODEC-89">
-        new Base64().encode() appends a CRLF, and chunks results into 76 
character lines.
-      </action>
-      <action dev="sebb" type="fix" issue="CODEC-92">
-        Many test cases use getBytes() which uses the default platform 
encoding so tests may fail on some platforms.
-      </action>
-      <action dev="sebb, julius, ggregory" type="add" issue="CODEC-93" 
due-to="sebb">
-        Add test(s) to check that encodeBase64() does not chunk output.
-      </action>
-      <action dev="sebb" type="fix" issue="CODEC-97" due-to="mjryall">
-        Base64 default constructor behaviour changed to enable chunking in 1.4.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-99" due-to="julius">
-        Base64.encodeBase64String() shouldn't chunk.
-      </action>
-      <action dev="julius" type="fix" issue="CODEC-101" due-to="balusc">
-        Base64InputStream#read(byte[]) incorrectly returns 0 at end of any 
stream which is multiple of 3 bytes long.
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-103" due-to="gnuf">
-        Typo in DecoderException message thrown from Hex.decodeHex.
-      </action>
-      <action dev="julius, ggregory" type="add" issue="CODEC-105" due-to="zak">
-        ArrayIndexOutOfBoundsException when doing multiple reads() on encoding 
Base64InputStream.
-      </action>
-      <action dev="bayard" type="add" issue="CODEC-106" due-to="it2mmeyerfa">
-        Add the "Kölner Phonetik" encoder (Cologne Phonetic) to codec.lang.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-111" due-to="ggregory">
-        org.apache.commons.codec.net.URLCodec.ESCAPE_CHAR isn't final but 
should be.
-      </action>
-      <action dev="sebb" type="add" issue="CODEC-112" due-to="sebb">
-        Base64.encodeBase64(byte[] binaryData, boolean isChunked, boolean 
urlSafe, int maxResultSize) throws IAE for valid maxResultSize if isChunked is 
false.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-113" due-to="ggregory">
-        org.apache.commons.codec.language.RefinedSoundex.US_ENGLISH_MAPPING 
should be package protected MALICIOUS_CODE.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-114" due-to="ggregory">
-        org.apache.commons.codec.language.Soundex.US_ENGLISH_MAPPING should be 
package protected MALICIOUS_CODE.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-115" due-to="sebb">
-        DoubleMetaphone.maxCodeLen should probably be private.
-      </action>
-      <action dev="ggregory" type="remove" issue="CODEC-116" due-to="ggregory">
-        Remove deprecated package private method 
Base64.discardWhitespace(byte[])
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-117" due-to="ggregory">
-        Caverphone encodes names starting and ending with "mb" incorrectly.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-118" due-to="ggregory">
-        Split Caverphone class into two classes for Caverphone 1.0 and 2.0.
-      </action>
-    </release>
-    <release version="1.4" date="9 August 2009" description="Feature and fix 
release. Requires a minimum of Java 1.4.">
-      <action dev="ggregory" type="fix" issue="CODEC-80" due-to="Julius 
Davies">
-        Regression: Base64.encode(chunk=true) has bug when input length is 
multiple of 76.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-77" due-to="Julius 
Davies">
-        Base64 bug with empty input (new byte[0]).
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-75" due-to="Julius 
Davies">
-        Make Base64 URL-safe.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-74">
-        Allow for uppercase letters output in Hex.encodeHex().
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-72" due-to="Sebb">
-        Soundex and RefinedSoundex issues with character arrays.
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-71" due-to="Sebb">
-        Base64.isArrayByteBase64() method is inefficient for large byte arrays.
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-70" due-to="Sebb">
-        Thread safety and malicious code safety improvements.
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-68" due-to="Robert 
Rodewald">
-        isBase64 throws ArrayIndexOutOfBoundsException on some non-BASE64 
bytes.
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-65" due-to="Benjamin 
Bentmann">
-        Fix case-insensitive string handling.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-61" due-to="Igor 
Slepchin">
-        Base64.encodeBase64() throws NegativeArraySizeException on large files.
-      </action>
-      <action dev="bayard" type="add" issue="CODEC-60">
-        Implement Caverphone.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-58" due-to="Julius 
Davies">
-        Character set used by Base64 not documented.
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-56" due-to="Sebb">
-        RefinedSoundex creates instance before al fields have been initialized.
-      </action>
-      <action dev="bayard" type="add" issue="CODEC-52" due-to="Niklas 
Gustavsson">
-        Digest on InputStreams.
-      </action>
-      <action dev="bayard" type="fix" issue="CODEC-51">
-        2 Test failures in SoundexTest.
-      </action>
-      <action dev="bayard" type="add" issue="CODEC-40" due-to="Chris Black">
-        Patch to add crypto-compatible BigInteger encoding support to Base64.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-39" due-to="Jamie 
Flournoy" due-to-email="[email protected]">
-        DigestUtils: Add methods for SHA-256, SHA-384, and SHA-512.
-      </action>
-      <action dev="tobrien" type="fix" issue="CODEC-10" due-to="Reggie Riser" 
due-to-email="[email protected]">
-        Using US_ENGLISH in Soundex caused an NullPointerException.
-      </action>
-      <action dev="tobrien" type="fix" issue="CODEC-6" due-to="David Tonhofer">
-        Source tarball spews files all over the place.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-22" due-to="Piero 
Ottuzzi">
-        Base64.isArrayByteBase64() throws an ArrayIndexOutOfBoundsException 
for negative octets
-      </action>
-      <action dev="jochen" type="add" issue="CODEC-69" due-to="Julius Davies">
-        Streaming Base64 (Base64InputStream and Base64OutputStream added).
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-78" due-to="Julius 
Davies">
-        Base64: Improve Code Coverage.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-59" due-to="Julius 
Davies">
-        Add methods to Base64 which work with String instead of byte[].
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-81" due-to="Julius 
Davies">
-        Base64's new constructor parameters ignored.
-      </action>
-      <action dev="niallp" type="fix" issue="CODEC-83">
-        Improve Double Metaphone test coverage.
-      </action>
-      <action dev="niallp" type="fix" issue="CODEC-84">
-        Double Metaphone bugs in alternative encoding.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-73" due-to="Benjamin 
Bentmann">
-        Make string2byte conversions indepedent of platform default encoding.
-      </action>
-    </release>
-    <release version="1.3" date="10 July 2004" description="Feature and fix 
release.">
-      <action dev="ggregory, tobrien" type="add" issue="CODEC-21" due-to="Alex 
Karasulu">
-        BinaryCodec: Encodes and decodes binary to and from Strings of 0s and 
1s.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
-        QuotedPrintableCodec: Codec for RFC 1521 MIME (Multipurpose Internet
-        Mail Extensions) Part One. Rules #3, #4, and #5 of the 
quoted-printable spec
-        are not implemented yet. See also issue CODEC-46.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
-        BCodec: Identical to the Base64 encoding defined by RFC 1521 and 
allows a
-        character set to be specified.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
-        QCodec: Similar to the Quoted-Printable content-transfer-encoding 
defined
-        in RFC 1521 and designed to allow text containing mostly ASCII 
characters to
-        be decipherable on an ASCII terminal without decoding.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew 
Inger" due-to-email="[email protected]">
-        Soundex: Implemented the DIFFERENCE algorithm.
-      </action>
-      <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew 
Inger" due-to-email="[email protected]">
-        RefinedSoundex: Implemented the DIFFERENCE algorithm.
-      </action>
-      <action dev="ggregory" type="update">
-        This version is relesed under the
-        &lt;a href="http://www.apache.org/licenses/LICENSE-2.0";>Apache License 
2.0&lt;/a>
-        , please see LICENSE.txt. Previous versions were released under the
-        &lt;a href="http://www.apache.org/licenses/LICENSE-1.1";>Apache License 
1.1&lt;/a>
-      </action>
-      <action dev="ggregory" type="update">
-        The Board recommendation to remove Javadoc author tags has been
-        implemented. All author tags are now "Apache Software Foundation".
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-25" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
-        The default URL encoding logic was broken.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-31" due-to="Gary D. 
Gregory">
-        Base64 chunked encoding not compliant with RFC 2045 section 2.1 CRLF.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-5">
-        Hex converts illegal characters to 255.
-      </action>
-      <action dev="tobrien" type="fix" issue="CODEC-17">
-        Metaphone now correctly handles a silent B in a word that ends in MB.
-        "COMB" is encoded as "KM", before this fix "COMB" was encoded as "KMB".
-      </action>
-      <action dev="ggregory" type="fix">
-        Added missing tags in Javadoc comments.
-      </action>
-      <action dev="ggregory" type="fix">
-        General Javadoc improvements.
-      </action>
-    </release>
-    <release version="1.2" date="24 Nov 2003" description="Feature and fix 
release.">
-      <action dev="tobrien" type="add" due-to="Oleg Kalnichevski" 
due-to-email="[email protected]">
-        URLCodec: Implements the www-form-urlencoded encoding scheme.
-      </action>
-      <action dev="tobrien" type="add" due-to="Dave Dribin, David Graham">
-        DigestUtils: Calculates MD5 and SHA digests.
-      </action>
-      <action dev="tobrien" type="fix" issue="CODEC-26" due-to="Brian Ewins">
-        Modified Base64 to remedy non-compliance with RFC
-        2045. Non-Base64 characters were not being discarded during the
-        decode. RFC 2045 explicitly states that all characters outside of the
-        base64 alphabet are to be ignored.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-4">
-        Hex.decode(Object) throws a ClassCastException when a String argument 
is passed in.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-3">
-        Soundex: The HW rule is not applied; hyphens and apostrophes are not 
ignored.
-      </action>
-      <action dev="ggregory" type="fix" issue="CODEC-29">
-        Soundex.setMaxLength causes bugs and is not needed.
-        Calling Soundex.setMaxLength() with a value of 2 or less causes the 
wrong
-        answer to be returned. Since the encoding returned by Soundex is always
-        of length 4 by definition (we do not use the '-' in as a letter-nnn
-        separator) the need for a maxLength attribute is not needed. Deprecate
-        the field and accessor methods.
-      </action>
-      <action dev="Members" type="fix">
-        Fix in Metaphone relating to the handling of the maximum code length.
-      </action>
-    </release>
-    <release version="1.1" date="29 April 2003"
-      description="The first official release. Numerous projects had been 
depending on version 1.0-dev while in the Sandbox.">
-      <action dev="Members" type="add">
-        A newer version of the Base64 class reflecting improvements from
-        both the commons-httpclient and xml-rpc versions of code forked
-        from catalina.
-      </action>
-      <action dev="Members" type="add">
-        Base64 class from commons-httpclient in org.apache.commons.codec.base64
-        has been retained for backwards compatibility but has been deprecated.
-      </action>
-      <action dev="Members" type="add">
-        Soundex class from commons-util in org.apache.commons.codec.
-      </action>
-      <action dev="Members" type="add">
-        Metaphone class from commons-util in org.apache.commons.codec.
-      </action>
-      <action dev="tobrien" type="add">
-        RefinedSoundex class in org.apache.commons.codec.
-      </action>
-      <action dev="Members" type="add">
-        Encoder/Decoder interfaces in org.apache.commons.
-      </action>
-      <action dev="Members" type="add">
-        String and Binary specific Encoder/Decoder interfaces in 
org.apache.commons.
-      </action>
-      <action dev="Members" type="add">
-        StringEncoderComparator replaces the SoundexComparator from the 
language package.
-      </action>
-      <action dev="Members" type="fix">
-        Base64 now discards whitespace characters when decoding encoded 
content.
-      </action>
-    </release>
-    <release version="1.0-dev" date="25 April 2003" description="Last release 
from the Sandbox.">
-      <action dev="tobrien" type="add">
-        Base64 class from commons-httpclient in 
org.apache.commons.codec.base64.
-      </action>
-      <action dev="tobrien" type="add">
-        Soundex class from commons-util in org.apache.commons.codec.
-      </action>
-      <action dev="tobrien" type="add">
-        Metaphone class from commons-util in org.apache.commons.codec.
-      </action>
-      <action dev="Members" type="add">
-        SoundexComparator class from commons-util in org.apache.commons.codec.
-      </action>
-    </release>
-  </body>
-</document>
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<!-- $Id$ -->
+
+<!--
+This file is also used by the maven-changes-plugin to generate the release 
notes.
+Useful ways of finding items to add to this file are:
+
+1.  Add items when you fix a bug or add a feature (this makes the
+release process easy :-).
+
+2.  Do a JIRA search for tickets closed since the previous release.
+
+3.  Use the report generated by the maven-changelog-plugin to see all
+SVN commits. TBA how to use this with SVN.
+
+To generate the release notes from this file:
+
+mvn changes:announcement-generate -Prelease-notes [-Dchanges.version=m.n]
+
+The <action> type attribute can be add,update,fix,remove.
+-->
+
+<document>
+  <properties>
+    <title>Changes</title>
+    <author>Apache Commons Developers</author>
+  </properties>
+  <body>
+
+    <release version="1.13" date="YYYY-MM-DD" description="TBD">
+      <action issue="CODEC-257" dev="ggregory" type="update">Update from Java 
7 to Java 8</action>      
+    </release>
+
+    <release version="1.12" date="2019-02-04" description="Feature and fix 
release.">
+      <!-- The first attribute below should be the issue id; makes it easier 
to navigate in the IDE outline -->
+      <action issue="CODEC-252" dev="chtompki" type="fix">B64 salt generator: 
Random -> ThreadLocalRandom</action>
+      <action issue="CODEC-250" dev="sebb" type="fix" due-to="Alex 
Volodko">Wrong value calculated by Cologne Phonetic if a special character is 
placed between equal letters</action>
+      <action issue="CODEC-244" dev="ggregory" type="update">Update from Java 
6 to Java 7</action>
+      <action issue="CODEC-240" dev="ggregory" type="add" due-to="Ioannis 
Sermetziadis">Add Percent-Encoding Codec (described in RFC3986 and 
RFC7578)</action>
+      <action issue="CODEC-246" dev="ggregory" type="fix" due-to="Oscar Luis 
Vera Pérez">ColognePhoneticTest.testIsEncodeEquals missing assertions</action>
+      <action issue="CODEC-251" dev="ggregory" type="add" due-to="Gary 
Gregory">Add SHA-3 methods in DigestUtils</action>
+    </release>
+    <release version="1.11" date="2017-10-20" description="Feature and fix 
release.">
+      <!-- The first attribute below should be the issue id; makes it easier 
to navigate in the IDE outline -->
+      <action issue="CODEC-241" type="add">Add support for XXHash32</action>
+      <action issue="CODEC-234" dev="ggregory" type="update" 
due-to="Christopher Schultz, Sebb">Base32.decode should support lowercase 
letters</action>
+      <action issue="CODEC-233" dev="sebb" type="update" due-to="Yossi 
Tamari">Soundex should support more algorithm variants</action>
+      <action issue="CODEC-145" dev="sebb" type="fix" due-to="Jesse 
Glick">Base64.encodeBase64String could better use newStringUsAscii (ditto 
encodeBase64URLSafeString)</action>
+      <action issue="CODEC-144" dev="sebb" type="fix">BaseNCodec: 
encodeToString and encodeAsString methods are identical</action>
+      <action issue="CODEC-232" dev="sebb" type="fix">URLCodec is neither 
immutable nor threadsafe</action>
+      <action issue="CODEC-231" dev="sebb" 
type="fix">StringUtils.equals(CharSequence cs1, CharSequence cs2) can fail with 
String Index OBE</action>
+      <action issue="CODEC-230" dev="sebb" type="fix">URLCodec.WWW_FORM_URL 
should be private</action>
+      <action issue="CODEC-229" dev="sebb" 
type="fix">StringUtils.newStringxxx(null) should return null, not NPE</action>
+      <action issue="CODEC-220" dev="sebb" type="add">Fluent interface for 
DigestUtils</action>
+      <action issue="CODEC-222" dev="sebb" type="add">Fluent interface for 
HmacUtils</action>
+      <action issue="CODEC-225" dev="jochen" type="fix" due-to="Svetlin 
Zarev">Fix minor resource leaks</action>
+      <action issue="CODEC-223" dev="sebb" type="remove">Drop obsolete Ant 
build</action>
+      <action issue="CODEC-171" dev="sebb" type="add" due-to="Brett Okken">Add 
support for CRC32-C</action>
+      <action issue="CODEC-221" dev="sebb" type="update">HmacUtils.updateHmac 
calls reset() unnecessarily</action>
+      <action issue="CODEC-200" dev="sebb" type="fix" due-to="Luciano 
Vernaschi">Base32.HEX_DECODE_TABLE contains the wrong value 32</action>
+      <action issue="CODEC-207" dev="ggregory" type="fix" due-to="Gary 
Gregory">Charsets Javadoc breaks build when using Java 8</action>
+      <action issue="CODEC-199" dev="ggregory/sebb" type="fix" due-to="Yossi 
Tamari">Bug in HW rule in Soundex</action>
+      <action issue="CODEC-209" dev="ggregory" type="fix" due-to="Gary 
Gregory">Javadoc for SHA-224 DigestUtils methods should mention Java 1.8.0 
restriction instead of 1.4.0.</action>
+      <action issue="CODEC-219" dev="ggregory" type="fix" due-to="Gary 
Gregory, Sebb">Don't deprecate Charsets Charset constants in favor of Java 7's 
java.nio.charset.StandardCharsets</action>
+      <action issue="CODEC-217" dev="ggregory" type="add" due-to="Gary 
Gregory">Add HmacAlgorithms.HMAC_SHA_224 (Java 8 only)</action>
+      <action issue="CODEC-213" dev="ggregory" type="add" due-to="Gary 
Gregory">Support JEP 287: SHA-3 Hash Algorithms</action>
+      <action issue="CODEC-212" dev="ggregory" type="add" due-to="Gary 
Gregory">Create a minimal Digest command line utility: 
org.apache.commons.codec.digest.Digest</action>
+      <action issue="CODEC-210" dev="ggregory" type="add" due-to="Gary 
Gregory">Add DigestUtils.getDigest(String, MessageDigest)</action>
+      <action issue="CODEC-208" dev="ggregory" type="add" due-to="Gary 
Gregory">Make some DigestUtils APIs public</action>
+      <action issue="CODEC-206" dev="ggregory" type="add" due-to="Gary 
Gregory">Add java.io.File APIs to MessageDigestAlgorithm</action>
+      <action issue="CODEC-183" dev="ggregory" type="add" due-to="Steven 
Wurster">BaseNCodecOutputStream only supports writing EOF on close()</action>
+      <action issue="CODEC-195" dev="ggregory" type="add" due-to="Gary 
Gregory">Support SHA-224 in DigestUtils on Java 8</action>
+      <action issue="CODEC-194" dev="ggregory" type="add" due-to="Gary 
Gregory">Support java.nio.ByteBuffer in 
org.apache.commons.codec.binary.Hex</action>
+      <action issue="CODEC-193" dev="ggregory" type="add" due-to="Michael 
Donaghy">Support java.nio.ByteBuffer in DigestUtils</action>
+      <action issue="CODEC-202" dev="ggregory" type="add" due-to="Oleg 
Kalnichevski">Add BaseNCodec.encode(byte[], int, int) input with offset and 
length parameters for Base64 and Base32.</action>
+      <action issue="CODEC-203" dev="ggregory" type="add" due-to="Gary 
Gregory">Add convenience method decodeHex(String).</action>
+      <action issue="CODEC-205" dev="ggregory" type="add" due-to="Gary 
Gregory">Add faster CRC32 implementation.</action>
+      <action issue="CODEC-224" dev="ggregory" type="add" due-to="Gary 
Gregory">Add convenience API 
org.apache.commons.codec.binary.Hex.encodeHexString(byte[]|ByteBuffer, 
boolean).</action>
+      <action issue="CODEC-242" dev="ggregory" type="add" due-to="Gary 
Gregory">Add Automatic-Module-Name manifest entry for Java 9.</action>
+    </release>
+    <release version="1.10" date="5 November 2014" description="Feature and 
fix release.">
+      <action dev="ggregory" type="add" issue="CODEC-192" due-to="Thomas 
Neidhart">Add Daitch-Mokotoff Soundex</action>
+      <action dev="ggregory" type="add" issue="CODEC-121" due-to="Thomas 
Neidhart, Java John">QuotedPrintableCodec does not support soft line break per 
the 'quoted-printable' example on Wikipedia</action>
+      <action dev="tn" type="fix" issue="CODEC-185" due-to="Sean Busbey">Added 
clarification to Javadoc of Base64 concerning the use of the urlSafe 
parameter</action>
+      <action dev="tn" type="fix" issue="CODEC-191" due-to="Igor Savin">Added 
clarification to the Javadoc of Base[32|64]OutputStream that it is mandatory to 
call close()</action>
+      <action dev="ggregory" type="fix" issue="CODEC-188" due-to="Hendrik 
Saly">Add support for HMAC Message Authentication Code (MAC) digests</action>
+      <action dev="ggregory" type="fix" issue="CODEC-187" due-to="Michael 
Tobias, Thomas Neidhart">Beider Morse Phonetic Matching producing incorrect 
tokens</action>
+      <action dev="ggregory" type="fix" issue="CODEC-184" due-to="Cyrille 
Artho">NullPointerException in DoubleMetaPhone.isDoubleMetaphoneEqual when 
using empty strings</action>
+      <action dev="ggregory" type="add" issue="CODEC-181" due-to="Ivan 
Martinez-Ortiz">Make possible to provide padding byte to BaseNCodec in 
constructor</action>
+      <action dev="ggregory" type="fix" issue="CODEC-180" due-to="Ville 
Skyttä">Fix Javadoc 1.8.0 errors</action>
+      <action dev="ggregory" type="update" issue="CODEC-178">Deprecate 
Charsets Charset constants in favor of Java 7's 
java.nio.charset.StandardCharsets</action>
+      <action dev="ggregory" type="fix" issue="CODEC-189">Fix Java 8 build 
Javadoc errors</action>
+      <action dev="ggregory" type="update" issue="CODEC-190">Update from 
commons-parent 34 to 35</action>
+    </release>
+    <release version="1.9" date="20 December 2013" description="Feature and 
fix release.">
+      <action dev="ggregory" type="update" issue="CODEC-174" due-to="Thomas 
Champagne">Improve performance of Beider Morse encoder</action>
+      <action dev="ggregory" type="fix" issue="CODEC-175">Beider Morse does 
not close Scanners used to read config files</action>
+      <action dev="sebb" type="fix" issue="CODEC-172" due-to="Matt 
Bishop">Base32 decode table has spurious value</action>
+      <action dev="ggregory" type="fix" issue="CODEC-170" due-to="Ron Wheeler, 
Henri Yandell">Link broken in Metaphone Javadoc</action>
+      <action dev="ggregory" type="fix" issue="CODEC-176" due-to="Ville 
Skyttä">Spelling fixes in Javadoc and comments</action>
+    </release>
+    <release version="1.8" date="19 April 2013" description="Feature and fix 
release. Requires a minimum of Java 1.6.">
+      <action dev="ggregory" type="add" issue="CODEC-168" due-to="Daniel 
Cassidy">Add DigestUtils.updateDigest(MessageDigest, InputStream).</action>
+      <action dev="julius" type="add" issue="CODEC-167">Add JUnit to test our 
decode with pad character in the middle.</action>
+      <action dev="ggregory" type="add" issue="CODEC-161" due-to="crice">Add 
Match Rating Approach (MRA) phonetic algorithm encoder.</action>
+      <action dev="ggregory" type="fix" issue="CODEC-163" 
due-to="leo141">ColognePhonetic encoder unnecessarily creates many char arrays 
on every loop run.</action>
+      <action dev="sebb" type="fix" 
issue="CODEC-160">Base64.encodeBase64URLSafeString doesn't add padding 
characters at the end.</action>
+    </release>
+    <release version="1.7" date="11 September 2012" description="Feature and 
fix release. Requires a minimum of Java 1.6.">
+      <action issue="CODEC-157" dev="ggregory" type="add" due-to="ggregory">
+        DigestUtils: Add MD2 APIs.
+      </action>
+      <action issue="CODEC-156" dev="ggregory" type="add" due-to="ggregory">
+        DigestUtils: add APIs named after standard algorithm name SHA-1.
+      </action>
+      <action issue="CODEC-155" dev="ggregory" type="add" due-to="ggregory">
+        DigestUtils.getDigest(String) should throw IllegalArgumentException 
instead of RuntimeException.
+      </action>
+      <action issue="CODEC-153" dev="ggregory" type="add" due-to="ggregory">
+        Create a class MessageDigestAlgorithms to define standard algorithm 
names.
+      </action>
+      <action issue="CODEC-152" dev="ggregory" type="add" due-to="ggregory">
+        DigestUtils.getDigest(String) loses the original exception.
+      </action>
+      <action issue="CODEC-151" dev="ggregory" type="add" due-to="lathspell">
+        Remove unnecessary attempt to fill up the salt variable in UnixCrypt.
+      </action>
+      <action issue="CODEC-150" dev="ggregory" type="add" due-to="lathspell">
+        Remove unnecessary call to Math.abs().
+      </action>
+      <action issue="CODEC-148" dev="ggregory" type="add" due-to="lathspell">
+        More tests and minor things.
+      </action>
+      <action issue="CODEC-146" dev="tn" type="add" due-to="Julius Davies">
+        Added regression tests for PhoneticEngine based on Solr-3.6.0.
+      </action>
+      <action issue="CODEC-147" dev="tn" type="update">
+        BeiderMorseEncoder/PhoneticEngine: make results deterministic by using 
a LinkedHashSet
+        instead of a HashSet.
+      </action>
+      <action issue="CODEC-143" dev="sebb" type="update">
+        StringBuffer could be replaced by StringBuilder for local variables.
+      </action>
+      <action issue="CODEC-139" dev="ggregory" type="add" due-to="dsebastien">
+        DigestUtils: add updateDigest methods and make methods public.
+      </action>
+      <action issue="CODEC-133" dev="ggregory" type="add" due-to="lathspell">
+        Add classes for MD5/SHA1/SHA-512-based Unix crypt(3) hash variants.
+      </action>
+      <action issue="CODEC-96" dev="ggregory" type="fix" due-to="sebb">
+        Base64 encode() method is no longer thread-safe, breaking clients 
using it as a shared BinaryEncoder.
+        Note: the fix breaks binary compatibility, however the changes are to 
a class (BaseNCodec) which is
+        intended for internal use.
+      </action>
+      <action issue="CODEC-138" dev="sebb" type="fix">
+        Complete FilterInputStream interface for BaseNCodecInputStream.
+      </action>
+      <action issue="CODEC-136" dev="ggregory" type="fix">
+        Use Charset objects when possible, create Charsets for required 
character encodings.
+      </action>
+      <action issue="CODEC-132" dev="ggregory" type="fix" due-to="rcmuir">
+        BeiderMorseEncoder OOM issues.
+      </action>
+      <action issue="CODEC-131" dev="tn" type="fix" due-to="smolav">
+        DoubleMetaphone Javadoc contains dead links.
+      </action>
+      <action issue="CODEC-130" dev="ggregory" type="add" due-to="tn">
+        Base64InputStream.skip skips underlying stream, not output.
+      </action>
+      <action issue="CODEC-63" dev="ggregory" type="add" due-to="tn">
+        Implement NYSIIS phonetic encoder.
+      </action>
+    </release>
+    <release version="1.6" date="20 November 2011" description="Feature and 
fix release. Requires a minimum of Java 1.5.">
+      <action dev="ggregory" type="fix" issue="CODEC-129" due-to="ggregory">
+        Use standard Maven directory layout.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-128" 
due-to="[email protected]">
+        Documentation spelling fixes.
+      </action>
+      <action dev="ggregory, sebb" type="fix" issue="CODEC-127">
+        Fix various character encoding issues in comments and test cases.
+      </action>
+      <action dev="ggregory, matthewpocock" type="update" issue="CODEC-125" 
due-to="Matthew Pocock">
+        Implement a Beider-Morse phonetic matching codec.
+      </action>
+      <action dev="ggregory" type="update" issue="CODEC-119">
+        Migrate to Java 5.
+      </action>
+      <action dev="ggregory" type="update" issue="CODEC-120">
+        Migrate to JUnit 4.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-123">
+        ColognePhonetic Javadoc should use HTML entities for special 
characters.
+      </action>
+    </release>
+    <release version="1.5" date="29 March 2011" description="Feature and fix 
release. Requires a minimum of Java 1.4.">
+      <action dev="sebb" type="add" issue="CODEC-88">
+        Added new Base32 encoder.
+      </action>
+      <action dev="sebb" type="fix" issue="CODEC-89">
+        new Base64().encode() appends a CRLF, and chunks results into 76 
character lines.
+      </action>
+      <action dev="sebb" type="fix" issue="CODEC-92">
+        Many test cases use getBytes() which uses the default platform 
encoding so tests may fail on some platforms.
+      </action>
+      <action dev="sebb, julius, ggregory" type="add" issue="CODEC-93" 
due-to="sebb">
+        Add test(s) to check that encodeBase64() does not chunk output.
+      </action>
+      <action dev="sebb" type="fix" issue="CODEC-97" due-to="mjryall">
+        Base64 default constructor behaviour changed to enable chunking in 1.4.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-99" due-to="julius">
+        Base64.encodeBase64String() shouldn't chunk.
+      </action>
+      <action dev="julius" type="fix" issue="CODEC-101" due-to="balusc">
+        Base64InputStream#read(byte[]) incorrectly returns 0 at end of any 
stream which is multiple of 3 bytes long.
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-103" due-to="gnuf">
+        Typo in DecoderException message thrown from Hex.decodeHex.
+      </action>
+      <action dev="julius, ggregory" type="add" issue="CODEC-105" due-to="zak">
+        ArrayIndexOutOfBoundsException when doing multiple reads() on encoding 
Base64InputStream.
+      </action>
+      <action dev="bayard" type="add" issue="CODEC-106" due-to="it2mmeyerfa">
+        Add the "Kölner Phonetik" encoder (Cologne Phonetic) to codec.lang.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-111" due-to="ggregory">
+        org.apache.commons.codec.net.URLCodec.ESCAPE_CHAR isn't final but 
should be.
+      </action>
+      <action dev="sebb" type="add" issue="CODEC-112" due-to="sebb">
+        Base64.encodeBase64(byte[] binaryData, boolean isChunked, boolean 
urlSafe, int maxResultSize) throws IAE for valid maxResultSize if isChunked is 
false.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-113" due-to="ggregory">
+        org.apache.commons.codec.language.RefinedSoundex.US_ENGLISH_MAPPING 
should be package protected MALICIOUS_CODE.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-114" due-to="ggregory">
+        org.apache.commons.codec.language.Soundex.US_ENGLISH_MAPPING should be 
package protected MALICIOUS_CODE.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-115" due-to="sebb">
+        DoubleMetaphone.maxCodeLen should probably be private.
+      </action>
+      <action dev="ggregory" type="remove" issue="CODEC-116" due-to="ggregory">
+        Remove deprecated package private method 
Base64.discardWhitespace(byte[])
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-117" due-to="ggregory">
+        Caverphone encodes names starting and ending with "mb" incorrectly.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-118" due-to="ggregory">
+        Split Caverphone class into two classes for Caverphone 1.0 and 2.0.
+      </action>
+    </release>
+    <release version="1.4" date="9 August 2009" description="Feature and fix 
release. Requires a minimum of Java 1.4.">
+      <action dev="ggregory" type="fix" issue="CODEC-80" due-to="Julius 
Davies">
+        Regression: Base64.encode(chunk=true) has bug when input length is 
multiple of 76.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-77" due-to="Julius 
Davies">
+        Base64 bug with empty input (new byte[0]).
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-75" due-to="Julius 
Davies">
+        Make Base64 URL-safe.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-74">
+        Allow for uppercase letters output in Hex.encodeHex().
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-72" due-to="Sebb">
+        Soundex and RefinedSoundex issues with character arrays.
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-71" due-to="Sebb">
+        Base64.isArrayByteBase64() method is inefficient for large byte arrays.
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-70" due-to="Sebb">
+        Thread safety and malicious code safety improvements.
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-68" due-to="Robert 
Rodewald">
+        isBase64 throws ArrayIndexOutOfBoundsException on some non-BASE64 
bytes.
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-65" due-to="Benjamin 
Bentmann">
+        Fix case-insensitive string handling.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-61" due-to="Igor 
Slepchin">
+        Base64.encodeBase64() throws NegativeArraySizeException on large files.
+      </action>
+      <action dev="bayard" type="add" issue="CODEC-60">
+        Implement Caverphone.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-58" due-to="Julius 
Davies">
+        Character set used by Base64 not documented.
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-56" due-to="Sebb">
+        RefinedSoundex creates instance before al fields have been initialized.
+      </action>
+      <action dev="bayard" type="add" issue="CODEC-52" due-to="Niklas 
Gustavsson">
+        Digest on InputStreams.
+      </action>
+      <action dev="bayard" type="fix" issue="CODEC-51">
+        2 Test failures in SoundexTest.
+      </action>
+      <action dev="bayard" type="add" issue="CODEC-40" due-to="Chris Black">
+        Patch to add crypto-compatible BigInteger encoding support to Base64.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-39" due-to="Jamie 
Flournoy" due-to-email="[email protected]">
+        DigestUtils: Add methods for SHA-256, SHA-384, and SHA-512.
+      </action>
+      <action dev="tobrien" type="fix" issue="CODEC-10" due-to="Reggie Riser" 
due-to-email="[email protected]">
+        Using US_ENGLISH in Soundex caused an NullPointerException.
+      </action>
+      <action dev="tobrien" type="fix" issue="CODEC-6" due-to="David Tonhofer">
+        Source tarball spews files all over the place.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-22" due-to="Piero 
Ottuzzi">
+        Base64.isArrayByteBase64() throws an ArrayIndexOutOfBoundsException 
for negative octets
+      </action>
+      <action dev="jochen" type="add" issue="CODEC-69" due-to="Julius Davies">
+        Streaming Base64 (Base64InputStream and Base64OutputStream added).
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-78" due-to="Julius 
Davies">
+        Base64: Improve Code Coverage.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-59" due-to="Julius 
Davies">
+        Add methods to Base64 which work with String instead of byte[].
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-81" due-to="Julius 
Davies">
+        Base64's new constructor parameters ignored.
+      </action>
+      <action dev="niallp" type="fix" issue="CODEC-83">
+        Improve Double Metaphone test coverage.
+      </action>
+      <action dev="niallp" type="fix" issue="CODEC-84">
+        Double Metaphone bugs in alternative encoding.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-73" due-to="Benjamin 
Bentmann">
+        Make string2byte conversions indepedent of platform default encoding.
+      </action>
+    </release>
+    <release version="1.3" date="10 July 2004" description="Feature and fix 
release.">
+      <action dev="ggregory, tobrien" type="add" issue="CODEC-21" due-to="Alex 
Karasulu">
+        BinaryCodec: Encodes and decodes binary to and from Strings of 0s and 
1s.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
+        QuotedPrintableCodec: Codec for RFC 1521 MIME (Multipurpose Internet
+        Mail Extensions) Part One. Rules #3, #4, and #5 of the 
quoted-printable spec
+        are not implemented yet. See also issue CODEC-46.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
+        BCodec: Identical to the Base64 encoding defined by RFC 1521 and 
allows a
+        character set to be specified.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
+        QCodec: Similar to the Quoted-Printable content-transfer-encoding 
defined
+        in RFC 1521 and designed to allow text containing mostly ASCII 
characters to
+        be decipherable on an ASCII terminal without decoding.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew 
Inger" due-to-email="[email protected]">
+        Soundex: Implemented the DIFFERENCE algorithm.
+      </action>
+      <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew 
Inger" due-to-email="[email protected]">
+        RefinedSoundex: Implemented the DIFFERENCE algorithm.
+      </action>
+      <action dev="ggregory" type="update">
+        This version is relesed under the
+        &lt;a href="http://www.apache.org/licenses/LICENSE-2.0";>Apache License 
2.0&lt;/a>
+        , please see LICENSE.txt. Previous versions were released under the
+        &lt;a href="http://www.apache.org/licenses/LICENSE-1.1";>Apache License 
1.1&lt;/a>
+      </action>
+      <action dev="ggregory" type="update">
+        The Board recommendation to remove Javadoc author tags has been
+        implemented. All author tags are now "Apache Software Foundation".
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-25" due-to="Oleg 
Kalnichevski" due-to-email="[email protected]">
+        The default URL encoding logic was broken.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-31" due-to="Gary D. 
Gregory">
+        Base64 chunked encoding not compliant with RFC 2045 section 2.1 CRLF.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-5">
+        Hex converts illegal characters to 255.
+      </action>
+      <action dev="tobrien" type="fix" issue="CODEC-17">
+        Metaphone now correctly handles a silent B in a word that ends in MB.
+        "COMB" is encoded as "KM", before this fix "COMB" was encoded as "KMB".
+      </action>
+      <action dev="ggregory" type="fix">
+        Added missing tags in Javadoc comments.
+      </action>
+      <action dev="ggregory" type="fix">
+        General Javadoc improvements.
+      </action>
+    </release>
+    <release version="1.2" date="24 Nov 2003" description="Feature and fix 
release.">
+      <action dev="tobrien" type="add" due-to="Oleg Kalnichevski" 
due-to-email="[email protected]">
+        URLCodec: Implements the www-form-urlencoded encoding scheme.
+      </action>
+      <action dev="tobrien" type="add" due-to="Dave Dribin, David Graham">
+        DigestUtils: Calculates MD5 and SHA digests.
+      </action>
+      <action dev="tobrien" type="fix" issue="CODEC-26" due-to="Brian Ewins">
+        Modified Base64 to remedy non-compliance with RFC
+        2045. Non-Base64 characters were not being discarded during the
+        decode. RFC 2045 explicitly states that all characters outside of the
+        base64 alphabet are to be ignored.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-4">
+        Hex.decode(Object) throws a ClassCastException when a String argument 
is passed in.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-3">
+        Soundex: The HW rule is not applied; hyphens and apostrophes are not 
ignored.
+      </action>
+      <action dev="ggregory" type="fix" issue="CODEC-29">
+        Soundex.setMaxLength causes bugs and is not needed.
+        Calling Soundex.setMaxLength() with a value of 2 or less causes the 
wrong
+        answer to be returned. Since the encoding returned by Soundex is always
+        of length 4 by definition (we do not use the '-' in as a letter-nnn
+        separator) the need for a maxLength attribute is not needed. Deprecate
+        the field and accessor methods.
+      </action>
+      <action dev="Members" type="fix">
+        Fix in Metaphone relating to the handling of the maximum code length.
+      </action>
+    </release>
+    <release version="1.1" date="29 April 2003"
+      description="The first official release. Numerous projects had been 
depending on version 1.0-dev while in the Sandbox.">
+      <action dev="Members" type="add">
+        A newer version of the Base64 class reflecting improvements from
+        both the commons-httpclient and xml-rpc versions of code forked
+        from catalina.
+      </action>
+      <action dev="Members" type="add">
+        Base64 class from commons-httpclient in org.apache.commons.codec.base64
+        has been retained for backwards compatibility but has been deprecated.
+      </action>
+      <action dev="Members" type="add">
+        Soundex class from commons-util in org.apache.commons.codec.
+      </action>
+      <action dev="Members" type="add">
+        Metaphone class from commons-util in org.apache.commons.codec.
+      </action>
+      <action dev="tobrien" type="add">
+        RefinedSoundex class in org.apache.commons.codec.
+      </action>
+      <action dev="Members" type="add">
+        Encoder/Decoder interfaces in org.apache.commons.
+      </action>
+      <action dev="Members" type="add">
+        String and Binary specific Encoder/Decoder interfaces in 
org.apache.commons.
+      </action>
+      <action dev="Members" type="add">
+        StringEncoderComparator replaces the SoundexComparator from the 
language package.
+      </action>
+      <action dev="Members" type="fix">
+        Base64 now discards whitespace characters when decoding encoded 
content.
+      </action>
+    </release>
+    <release version="1.0-dev" date="25 April 2003" description="Last release 
from the Sandbox.">
+      <action dev="tobrien" type="add">
+        Base64 class from commons-httpclient in 
org.apache.commons.codec.base64.
+      </action>
+      <action dev="tobrien" type="add">
+        Soundex class from commons-util in org.apache.commons.codec.
+      </action>
+      <action dev="tobrien" type="add">
+        Metaphone class from commons-util in org.apache.commons.codec.
+      </action>
+      <action dev="Members" type="add">
+        SoundexComparator class from commons-util in org.apache.commons.codec.
+      </action>
+    </release>
+  </body>
+</document>
diff --git 
a/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java 
b/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java
index 692e0df..96ea4ed 100644
--- a/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java
+++ b/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java
@@ -1,553 +1,553 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.codec.language;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Scanner;
-import java.util.Set;
-
-import org.apache.commons.codec.CharEncoding;
-import org.apache.commons.codec.EncoderException;
-import org.apache.commons.codec.Resources;
-import org.apache.commons.codec.StringEncoder;
-
-/**
- * Encodes a string into a Daitch-Mokotoff Soundex value.
- * <p>
- * The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and 
American Soundex algorithms, yielding greater
- * accuracy in matching especially Slavish and Yiddish surnames with similar 
pronunciation but differences in spelling.
- * </p>
- * <p>
- * The main differences compared to the other soundex variants are:
- * </p>
- * <ul>
- * <li>coded names are 6 digits long
- * <li>the initial character of the name is coded
- * <li>rules to encoded multi-character n-grams
- * <li>multiple possible encodings for the same name (branching)
- * </ul>
- * <p>
- * This implementation supports branching, depending on the used method:
- * <ul>
- * <li>{@link #encode(String)} - branching disabled, only the first code will 
be returned
- * <li>{@link #soundex(String)} - branching enabled, all codes will be 
returned, separated by '|'
- * </ul>
- * <p>
- * Note: this implementation has additional branching rules compared to the 
original description of the algorithm. The
- * rules can be customized by overriding the default rules contained in the 
resource file
- * {@code org/apache/commons/codec/language/dmrules.txt}.
- * </p>
- * <p>
- * This class is thread-safe.
- * </p>
- *
- * @see Soundex
- * @see <a 
href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex";> Wikipedia 
- Daitch-Mokotoff Soundex</a>
- * @see <a href="http://www.avotaynu.com/soundex.htm";>Avotaynu - Soundexing 
and Genealogy</a>
- *
- * @version $Id$
- * @since 1.10
- */
-public class DaitchMokotoffSoundex implements StringEncoder {
-
-    /**
-     * Inner class representing a branch during DM soundex encoding.
-     */
-    private static final class Branch {
-        private final StringBuilder builder;
-        private String cachedString;
-        private String lastReplacement;
-
-        private Branch() {
-            builder = new StringBuilder();
-            lastReplacement = null;
-            cachedString = null;
-        }
-
-        /**
-         * Creates a new branch, identical to this branch.
-         *
-         * @return a new, identical branch
-         */
-        public Branch createBranch() {
-            final Branch branch = new Branch();
-            branch.builder.append(toString());
-            branch.lastReplacement = this.lastReplacement;
-            return branch;
-        }
-
-        @Override
-        public boolean equals(final Object other) {
-            if (this == other) {
-                return true;
-            }
-            if (!(other instanceof Branch)) {
-                return false;
-            }
-
-            return toString().equals(((Branch) other).toString());
-        }
-
-        /**
-         * Finish this branch by appending '0's until the maximum code length 
has been reached.
-         */
-        public void finish() {
-            while (builder.length() < MAX_LENGTH) {
-                builder.append('0');
-                cachedString = null;
-            }
-        }
-
-        @Override
-        public int hashCode() {
-            return toString().hashCode();
-        }
-
-        /**
-         * Process the next replacement to be added to this branch.
-         *
-         * @param replacement
-         *            the next replacement to append
-         * @param forceAppend
-         *            indicates if the default processing shall be overridden
-         */
-        public void processNextReplacement(final String replacement, final 
boolean forceAppend) {
-            final boolean append = lastReplacement == null || 
!lastReplacement.endsWith(replacement) || forceAppend;
-
-            if (append && builder.length() < MAX_LENGTH) {
-                builder.append(replacement);
-                // remove all characters after the maximum length
-                if (builder.length() > MAX_LENGTH) {
-                    builder.delete(MAX_LENGTH, builder.length());
-                }
-                cachedString = null;
-            }
-
-            lastReplacement = replacement;
-        }
-
-        @Override
-        public String toString() {
-            if (cachedString == null) {
-                cachedString = builder.toString();
-            }
-            return cachedString;
-        }
-    }
-
-    /**
-     * Inner class for storing rules.
-     */
-    private static final class Rule {
-        private final String pattern;
-        private final String[] replacementAtStart;
-        private final String[] replacementBeforeVowel;
-        private final String[] replacementDefault;
-
-        protected Rule(final String pattern, final String replacementAtStart, 
final String replacementBeforeVowel,
-                final String replacementDefault) {
-            this.pattern = pattern;
-            this.replacementAtStart = replacementAtStart.split("\\|");
-            this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
-            this.replacementDefault = replacementDefault.split("\\|");
-        }
-
-        public int getPatternLength() {
-            return pattern.length();
-        }
-
-        public String[] getReplacements(final String context, final boolean 
atStart) {
-            if (atStart) {
-                return replacementAtStart;
-            }
-
-            final int nextIndex = getPatternLength();
-            final boolean nextCharIsVowel = nextIndex < context.length() ? 
isVowel(context.charAt(nextIndex)) : false;
-            if (nextCharIsVowel) {
-                return replacementBeforeVowel;
-            }
-
-            return replacementDefault;
-        }
-
-        private boolean isVowel(final char ch) {
-            return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 
'u';
-        }
-
-        public boolean matches(final String context) {
-            return context.startsWith(pattern);
-        }
-
-        @Override
-        public String toString() {
-            return String.format("%s=(%s,%s,%s)", pattern, 
Arrays.asList(replacementAtStart),
-                    Arrays.asList(replacementBeforeVowel), 
Arrays.asList(replacementDefault));
-        }
-    }
-
-    private static final String COMMENT = "//";
-    private static final String DOUBLE_QUOTE = "\"";
-
-    private static final String MULTILINE_COMMENT_END = "*/";
-
-    private static final String MULTILINE_COMMENT_START = "/*";
-
-    /** The resource file containing the replacement and folding rules */
-    private static final String RESOURCE_FILE = 
"org/apache/commons/codec/language/dmrules.txt";
-
-    /** The code length of a DM soundex value. */
-    private static final int MAX_LENGTH = 6;
-
-    /** Transformation rules indexed by the first character of their pattern. 
*/
-    private static final Map<Character, List<Rule>> RULES = new HashMap<>();
-
-    /** Folding rules. */
-    private static final Map<Character, Character> FOLDINGS = new HashMap<>();
-
-    static {
-        try (final Scanner scanner = new 
Scanner(Resources.getInputStream(RESOURCE_FILE), CharEncoding.UTF_8)) {
-            parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
-        }
-
-        // sort RULES by pattern length in descending order
-        for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
-            final List<Rule> ruleList = rule.getValue();
-            Collections.sort(ruleList, new Comparator<Rule>() {
-                @Override
-                public int compare(final Rule rule1, final Rule rule2) {
-                    return rule2.getPatternLength() - rule1.getPatternLength();
-                }
-            });
-        }
-    }
-
-    private static void parseRules(final Scanner scanner, final String 
location,
-            final Map<Character, List<Rule>> ruleMapping, final Map<Character, 
Character> asciiFoldings) {
-        int currentLine = 0;
-        boolean inMultilineComment = false;
-
-        while (scanner.hasNextLine()) {
-            currentLine++;
-            final String rawLine = scanner.nextLine();
-            String line = rawLine;
-
-            if (inMultilineComment) {
-                if (line.endsWith(MULTILINE_COMMENT_END)) {
-                    inMultilineComment = false;
-                }
-                continue;
-            }
-
-            if (line.startsWith(MULTILINE_COMMENT_START)) {
-                inMultilineComment = true;
-            } else {
-                // discard comments
-                final int cmtI = line.indexOf(COMMENT);
-                if (cmtI >= 0) {
-                    line = line.substring(0, cmtI);
-                }
-
-                // trim leading-trailing whitespace
-                line = line.trim();
-
-                if (line.length() == 0) {
-                    continue; // empty lines can be safely skipped
-                }
-
-                if (line.contains("=")) {
-                    // folding
-                    final String[] parts = line.split("=");
-                    if (parts.length != 2) {
-                        throw new IllegalArgumentException("Malformed folding 
statement split into " + parts.length +
-                                " parts: " + rawLine + " in " + location);
-                    }
-                    final String leftCharacter = parts[0];
-                    final String rightCharacter = parts[1];
-
-                    if (leftCharacter.length() != 1 || rightCharacter.length() 
!= 1) {
-                        throw new IllegalArgumentException("Malformed folding 
statement - " +
-                                "patterns are not single characters: " + 
rawLine + " in " + location);
-                    }
-
-                    asciiFoldings.put(leftCharacter.charAt(0), 
rightCharacter.charAt(0));
-                } else {
-                    // rule
-                    final String[] parts = line.split("\\s+");
-                    if (parts.length != 4) {
-                        throw new IllegalArgumentException("Malformed rule 
statement split into " + parts.length +
-                                " parts: " + rawLine + " in " + location);
-                    }
-                    try {
-                        final String pattern = stripQuotes(parts[0]);
-                        final String replacement1 = stripQuotes(parts[1]);
-                        final String replacement2 = stripQuotes(parts[2]);
-                        final String replacement3 = stripQuotes(parts[3]);
-
-                        final Rule r = new Rule(pattern, replacement1, 
replacement2, replacement3);
-                        final char patternKey = r.pattern.charAt(0);
-                        List<Rule> rules = ruleMapping.get(patternKey);
-                        if (rules == null) {
-                            rules = new ArrayList<>();
-                            ruleMapping.put(patternKey, rules);
-                        }
-                        rules.add(r);
-                    } catch (final IllegalArgumentException e) {
-                        throw new IllegalStateException(
-                                "Problem parsing line '" + currentLine + "' in 
" + location, e);
-                    }
-                }
-            }
-        }
-    }
-
-    private static String stripQuotes(String str) {
-        if (str.startsWith(DOUBLE_QUOTE)) {
-            str = str.substring(1);
-        }
-
-        if (str.endsWith(DOUBLE_QUOTE)) {
-            str = str.substring(0, str.length() - 1);
-        }
-
-        return str;
-    }
-
-    /** Whether to use ASCII folding prior to encoding. */
-    private final boolean folding;
-
-    /**
-     * Creates a new instance with ASCII-folding enabled.
-     */
-    public DaitchMokotoffSoundex() {
-        this(true);
-    }
-
-    /**
-     * Creates a new instance.
-     * <p>
-     * With ASCII-folding enabled, certain accented characters will be 
transformed to equivalent ASCII characters, e.g.
-     * è -&gt; e.
-     * </p>
-     *
-     * @param folding
-     *            if ASCII-folding shall be performed before encoding
-     */
-    public DaitchMokotoffSoundex(final boolean folding) {
-        this.folding = folding;
-    }
-
-    /**
-     * Performs a cleanup of the input string before the actual soundex 
transformation.
-     * <p>
-     * Removes all whitespace characters and performs ASCII folding if enabled.
-     * </p>
-     *
-     * @param input
-     *            the input string to cleanup
-     * @return a cleaned up string
-     */
-    private String cleanup(final String input) {
-        final StringBuilder sb = new StringBuilder();
-        for (char ch : input.toCharArray()) {
-            if (Character.isWhitespace(ch)) {
-                continue;
-            }
-
-            ch = Character.toLowerCase(ch);
-            if (folding && FOLDINGS.containsKey(ch)) {
-                ch = FOLDINGS.get(ch);
-            }
-            sb.append(ch);
-        }
-        return sb.toString();
-    }
-
-    /**
-     * Encodes an Object using the Daitch-Mokotoff soundex algorithm without 
branching.
-     * <p>
-     * This method is provided in order to satisfy the requirements of the 
Encoder interface, and will throw an
-     * EncoderException if the supplied object is not of type java.lang.String.
-     * </p>
-     *
-     * @see #soundex(String)
-     *
-     * @param obj
-     *            Object to encode
-     * @return An object (of type java.lang.String) containing the DM soundex 
code, which corresponds to the String
-     *         supplied.
-     * @throws EncoderException
-     *             if the parameter supplied is not of type java.lang.String
-     * @throws IllegalArgumentException
-     *             if a character is not mapped
-     */
-    @Override
-    public Object encode(final Object obj) throws EncoderException {
-        if (!(obj instanceof String)) {
-            throw new EncoderException(
-                    "Parameter supplied to DaitchMokotoffSoundex encode is not 
of type java.lang.String");
-        }
-        return encode((String) obj);
-    }
-
-    /**
-     * Encodes a String using the Daitch-Mokotoff soundex algorithm without 
branching.
-     *
-     * @see #soundex(String)
-     *
-     * @param source
-     *            A String object to encode
-     * @return A DM Soundex code corresponding to the String supplied
-     * @throws IllegalArgumentException
-     *             if a character is not mapped
-     */
-    @Override
-    public String encode(final String source) {
-        if (source == null) {
-            return null;
-        }
-        return soundex(source, false)[0];
-    }
-
-    /**
-     * Encodes a String using the Daitch-Mokotoff soundex algorithm with 
branching.
-     * <p>
-     * In case a string is encoded into multiple codes (see branching rules), 
the result will contain all codes,
-     * separated by '|'.
-     * </p>
-     * <p>
-     * Example: the name "AUERBACH" is encoded as both
-     * </p>
-     * <ul>
-     * <li>097400</li>
-     * <li>097500</li>
-     * </ul>
-     * <p>
-     * Thus the result will be "097400|097500".
-     * </p>
-     *
-     * @param source
-     *            A String object to encode
-     * @return A string containing a set of DM Soundex codes corresponding to 
the String supplied
-     * @throws IllegalArgumentException
-     *             if a character is not mapped
-     */
-    public String soundex(final String source) {
-        final String[] branches = soundex(source, true);
-        final StringBuilder sb = new StringBuilder();
-        int index = 0;
-        for (final String branch : branches) {
-            sb.append(branch);
-            if (++index < branches.length) {
-                sb.append('|');
-            }
-        }
-        return sb.toString();
-    }
-
-    /**
-     * Perform the actual DM Soundex algorithm on the input string.
-     *
-     * @param source
-     *            A String object to encode
-     * @param branching
-     *            If branching shall be performed
-     * @return A string array containing all DM Soundex codes corresponding to 
the String supplied depending on the
-     *         selected branching mode
-     */
-    private String[] soundex(final String source, final boolean branching) {
-        if (source == null) {
-            return null;
-        }
-
-        final String input = cleanup(source);
-
-        final Set<Branch> currentBranches = new LinkedHashSet<>();
-        currentBranches.add(new Branch());
-
-        char lastChar = '\0';
-        for (int index = 0; index < input.length(); index++) {
-            final char ch = input.charAt(index);
-
-            // ignore whitespace inside a name
-            if (Character.isWhitespace(ch)) {
-                continue;
-            }
-
-            final String inputContext = input.substring(index);
-            final List<Rule> rules = RULES.get(ch);
-            if (rules == null) {
-                continue;
-            }
-
-            // use an EMPTY_LIST to avoid false positive warnings wrt 
potential null pointer access
-            final List<Branch> nextBranches = branching ? new 
ArrayList<Branch>() : Collections.<Branch>emptyList();
-
-            for (final Rule rule : rules) {
-                if (rule.matches(inputContext)) {
-                    if (branching) {
-                        nextBranches.clear();
-                    }
-                    final String[] replacements = 
rule.getReplacements(inputContext, lastChar == '\0');
-                    final boolean branchingRequired = replacements.length > 1 
&& branching;
-
-                    for (final Branch branch : currentBranches) {
-                        for (final String nextReplacement : replacements) {
-                            // if we have multiple replacements, always create 
a new branch
-                            final Branch nextBranch = branchingRequired ? 
branch.createBranch() : branch;
-
-                            // special rule: occurrences of mn or nm are 
treated differently
-                            final boolean force = (lastChar == 'm' && ch == 
'n') || (lastChar == 'n' && ch == 'm');
-
-                            nextBranch.processNextReplacement(nextReplacement, 
force);
-
-                            if (branching) {
-                                nextBranches.add(nextBranch);
-                            } else {
-                                break;
-                            }
-                        }
-                    }
-
-                    if (branching) {
-                        currentBranches.clear();
-                        currentBranches.addAll(nextBranches);
-                    }
-                    index += rule.getPatternLength() - 1;
-                    break;
-                }
-            }
-
-            lastChar = ch;
-        }
-
-        final String[] result = new String[currentBranches.size()];
-        int index = 0;
-        for (final Branch branch : currentBranches) {
-            branch.finish();
-            result[index++] = branch.toString();
-        }
-
-        return result;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.codec.language;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+
+import org.apache.commons.codec.CharEncoding;
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.Resources;
+import org.apache.commons.codec.StringEncoder;
+
+/**
+ * Encodes a string into a Daitch-Mokotoff Soundex value.
+ * <p>
+ * The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and 
American Soundex algorithms, yielding greater
+ * accuracy in matching especially Slavish and Yiddish surnames with similar 
pronunciation but differences in spelling.
+ * </p>
+ * <p>
+ * The main differences compared to the other soundex variants are:
+ * </p>
+ * <ul>
+ * <li>coded names are 6 digits long
+ * <li>the initial character of the name is coded
+ * <li>rules to encoded multi-character n-grams
+ * <li>multiple possible encodings for the same name (branching)
+ * </ul>
+ * <p>
+ * This implementation supports branching, depending on the used method:
+ * <ul>
+ * <li>{@link #encode(String)} - branching disabled, only the first code will 
be returned
+ * <li>{@link #soundex(String)} - branching enabled, all codes will be 
returned, separated by '|'
+ * </ul>
+ * <p>
+ * Note: this implementation has additional branching rules compared to the 
original description of the algorithm. The
+ * rules can be customized by overriding the default rules contained in the 
resource file
+ * {@code org/apache/commons/codec/language/dmrules.txt}.
+ * </p>
+ * <p>
+ * This class is thread-safe.
+ * </p>
+ *
+ * @see Soundex
+ * @see <a 
href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex";> Wikipedia 
- Daitch-Mokotoff Soundex</a>
+ * @see <a href="http://www.avotaynu.com/soundex.htm";>Avotaynu - Soundexing 
and Genealogy</a>
+ *
+ * @version $Id$
+ * @since 1.10
+ */
+public class DaitchMokotoffSoundex implements StringEncoder {
+
+    /**
+     * Inner class representing a branch during DM soundex encoding.
+     */
+    private static final class Branch {
+        private final StringBuilder builder;
+        private String cachedString;
+        private String lastReplacement;
+
+        private Branch() {
+            builder = new StringBuilder();
+            lastReplacement = null;
+            cachedString = null;
+        }
+
+        /**
+         * Creates a new branch, identical to this branch.
+         *
+         * @return a new, identical branch
+         */
+        public Branch createBranch() {
+            final Branch branch = new Branch();
+            branch.builder.append(toString());
+            branch.lastReplacement = this.lastReplacement;
+            return branch;
+        }
+
+        @Override
+        public boolean equals(final Object other) {
+            if (this == other) {
+                return true;
+            }
+            if (!(other instanceof Branch)) {
+                return false;
+            }
+
+            return toString().equals(((Branch) other).toString());
+        }
+
+        /**
+         * Finish this branch by appending '0's until the maximum code length 
has been reached.
+         */
+        public void finish() {
+            while (builder.length() < MAX_LENGTH) {
+                builder.append('0');
+                cachedString = null;
+            }
+        }
+
+        @Override
+        public int hashCode() {
+            return toString().hashCode();
+        }
+
+        /**
+         * Process the next replacement to be added to this branch.
+         *
+         * @param replacement
+         *            the next replacement to append
+         * @param forceAppend
+         *            indicates if the default processing shall be overridden
+         */
+        public void processNextReplacement(final String replacement, final 
boolean forceAppend) {
+            final boolean append = lastReplacement == null || 
!lastReplacement.endsWith(replacement) || forceAppend;
+
+            if (append && builder.length() < MAX_LENGTH) {
+                builder.append(replacement);
+                // remove all characters after the maximum length
+                if (builder.length() > MAX_LENGTH) {
+                    builder.delete(MAX_LENGTH, builder.length());
+                }
+                cachedString = null;
+            }
+
+            lastReplacement = replacement;
+        }
+
+        @Override
+        public String toString() {
+            if (cachedString == null) {
+                cachedString = builder.toString();
+            }
+            return cachedString;
+        }
+    }
+
+    /**
+     * Inner class for storing rules.
+     */
+    private static final class Rule {
+        private final String pattern;
+        private final String[] replacementAtStart;
+        private final String[] replacementBeforeVowel;
+        private final String[] replacementDefault;
+
+        protected Rule(final String pattern, final String replacementAtStart, 
final String replacementBeforeVowel,
+                final String replacementDefault) {
+            this.pattern = pattern;
+            this.replacementAtStart = replacementAtStart.split("\\|");
+            this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
+            this.replacementDefault = replacementDefault.split("\\|");
+        }
+
+        public int getPatternLength() {
+            return pattern.length();
+        }
+
+        public String[] getReplacements(final String context, final boolean 
atStart) {
+            if (atStart) {
+                return replacementAtStart;
+            }
+
+            final int nextIndex = getPatternLength();
+            final boolean nextCharIsVowel = nextIndex < context.length() ? 
isVowel(context.charAt(nextIndex)) : false;
+            if (nextCharIsVowel) {
+                return replacementBeforeVowel;
+            }
+
+            return replacementDefault;
+        }
+
+        private boolean isVowel(final char ch) {
+            return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 
'u';
+        }
+
+        public boolean matches(final String context) {
+            return context.startsWith(pattern);
+        }
+
+        @Override
+        public String toString() {
+            return String.format("%s=(%s,%s,%s)", pattern, 
Arrays.asList(replacementAtStart),
+                    Arrays.asList(replacementBeforeVowel), 
Arrays.asList(replacementDefault));
+        }
+    }
+
+    private static final String COMMENT = "//";
+    private static final String DOUBLE_QUOTE = "\"";
+
+    private static final String MULTILINE_COMMENT_END = "*/";
+
+    private static final String MULTILINE_COMMENT_START = "/*";
+
+    /** The resource file containing the replacement and folding rules */
+    private static final String RESOURCE_FILE = 
"org/apache/commons/codec/language/dmrules.txt";
+
+    /** The code length of a DM soundex value. */
+    private static final int MAX_LENGTH = 6;
+
+    /** Transformation rules indexed by the first character of their pattern. 
*/
+    private static final Map<Character, List<Rule>> RULES = new HashMap<>();
+
+    /** Folding rules. */
+    private static final Map<Character, Character> FOLDINGS = new HashMap<>();
+
+    static {
+        try (final Scanner scanner = new 
Scanner(Resources.getInputStream(RESOURCE_FILE), CharEncoding.UTF_8)) {
+            parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
+        }
+
+        // sort RULES by pattern length in descending order
+        for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
+            final List<Rule> ruleList = rule.getValue();
+            Collections.sort(ruleList, new Comparator<Rule>() {
+                @Override
+                public int compare(final Rule rule1, final Rule rule2) {
+                    return rule2.getPatternLength() - rule1.getPatternLength();
+                }
+            });
+        }
+    }
+
+    private static void parseRules(final Scanner scanner, final String 
location,
+            final Map<Character, List<Rule>> ruleMapping, final Map<Character, 
Character> asciiFoldings) {
+        int currentLine = 0;
+        boolean inMultilineComment = false;
+
+        while (scanner.hasNextLine()) {
+            currentLine++;
+            final String rawLine = scanner.nextLine();
+            String line = rawLine;
+
+            if (inMultilineComment) {
+                if (line.endsWith(MULTILINE_COMMENT_END)) {
+                    inMultilineComment = false;
+                }
+                continue;
+            }
+
+            if (line.startsWith(MULTILINE_COMMENT_START)) {
+                inMultilineComment = true;
+            } else {
+                // discard comments
+                final int cmtI = line.indexOf(COMMENT);
+                if (cmtI >= 0) {
+                    line = line.substring(0, cmtI);
+                }
+
+                // trim leading-trailing whitespace
+                line = line.trim();
+
+                if (line.length() == 0) {
+                    continue; // empty lines can be safely skipped
+                }
+
+                if (line.contains("=")) {
+                    // folding
+                    final String[] parts = line.split("=");
+                    if (parts.length != 2) {
+                        throw new IllegalArgumentException("Malformed folding 
statement split into " + parts.length +
+                                " parts: " + rawLine + " in " + location);
+                    }
+                    final String leftCharacter = parts[0];
+                    final String rightCharacter = parts[1];
+
+                    if (leftCharacter.length() != 1 || rightCharacter.length() 
!= 1) {
+                        throw new IllegalArgumentException("Malformed folding 
statement - " +
+                                "patterns are not single characters: " + 
rawLine + " in " + location);
+                    }
+
+                    asciiFoldings.put(leftCharacter.charAt(0), 
rightCharacter.charAt(0));
+                } else {
+                    // rule
+                    final String[] parts = line.split("\\s+");
+                    if (parts.length != 4) {
+                        throw new IllegalArgumentException("Malformed rule 
statement split into " + parts.length +
+                                " parts: " + rawLine + " in " + location);
+                    }
+                    try {
+                        final String pattern = stripQuotes(parts[0]);
+                        final String replacement1 = stripQuotes(parts[1]);
+                        final String replacement2 = stripQuotes(parts[2]);
+                        final String replacement3 = stripQuotes(parts[3]);
+
+                        final Rule r = new Rule(pattern, replacement1, 
replacement2, replacement3);
+                        final char patternKey = r.pattern.charAt(0);
+                        List<Rule> rules = ruleMapping.get(patternKey);
+                        if (rules == null) {
+                            rules = new ArrayList<>();
+                            ruleMapping.put(patternKey, rules);
+                        }
+                        rules.add(r);
+                    } catch (final IllegalArgumentException e) {
+                        throw new IllegalStateException(
+                                "Problem parsing line '" + currentLine + "' in 
" + location, e);
+                    }
+                }
+            }
+        }
+    }
+
+    private static String stripQuotes(String str) {
+        if (str.startsWith(DOUBLE_QUOTE)) {
+            str = str.substring(1);
+        }
+
+        if (str.endsWith(DOUBLE_QUOTE)) {
+            str = str.substring(0, str.length() - 1);
+        }
+
+        return str;
+    }
+
+    /** Whether to use ASCII folding prior to encoding. */
+    private final boolean folding;
+
+    /**
+     * Creates a new instance with ASCII-folding enabled.
+     */
+    public DaitchMokotoffSoundex() {
+        this(true);
+    }
+
+    /**
+     * Creates a new instance.
+     * <p>
+     * With ASCII-folding enabled, certain accented characters will be 
transformed to equivalent ASCII characters, e.g.
+     * è -&gt; e.
+     * </p>
+     *
+     * @param folding
+     *            if ASCII-folding shall be performed before encoding
+     */
+    public DaitchMokotoffSoundex(final boolean folding) {
+        this.folding = folding;
+    }
+
+    /**
+     * Performs a cleanup of the input string before the actual soundex 
transformation.
+     * <p>
+     * Removes all whitespace characters and performs ASCII folding if enabled.
+     * </p>
+     *
+     * @param input
+     *            the input string to cleanup
+     * @return a cleaned up string
+     */
+    private String cleanup(final String input) {
+        final StringBuilder sb = new StringBuilder();
+        for (char ch : input.toCharArray()) {
+            if (Character.isWhitespace(ch)) {
+                continue;
+            }
+
+            ch = Character.toLowerCase(ch);
+            if (folding && FOLDINGS.containsKey(ch)) {
+                ch = FOLDINGS.get(ch);
+            }
+            sb.append(ch);
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Encodes an Object using the Daitch-Mokotoff soundex algorithm without 
branching.
+     * <p>
+     * This method is provided in order to satisfy the requirements of the 
Encoder interface, and will throw an
+     * EncoderException if the supplied object is not of type java.lang.String.
+     * </p>
+     *
+     * @see #soundex(String)
+     *
+     * @param obj
+     *            Object to encode
+     * @return An object (of type java.lang.String) containing the DM soundex 
code, which corresponds to the String
+     *         supplied.
+     * @throws EncoderException
+     *             if the parameter supplied is not of type java.lang.String
+     * @throws IllegalArgumentException
+     *             if a character is not mapped
+     */
+    @Override
+    public Object encode(final Object obj) throws EncoderException {
+        if (!(obj instanceof String)) {
+            throw new EncoderException(
+                    "Parameter supplied to DaitchMokotoffSoundex encode is not 
of type java.lang.String");
+        }
+        return encode((String) obj);
+    }
+
+    /**
+     * Encodes a String using the Daitch-Mokotoff soundex algorithm without 
branching.
+     *
+     * @see #soundex(String)
+     *
+     * @param source
+     *            A String object to encode
+     * @return A DM Soundex code corresponding to the String supplied
+     * @throws IllegalArgumentException
+     *             if a character is not mapped
+     */
+    @Override
+    public String encode(final String source) {
+        if (source == null) {
+            return null;
+        }
+        return soundex(source, false)[0];
+    }
+
+    /**
+     * Encodes a String using the Daitch-Mokotoff soundex algorithm with 
branching.
+     * <p>
+     * In case a string is encoded into multiple codes (see branching rules), 
the result will contain all codes,
+     * separated by '|'.
+     * </p>
+     * <p>
+     * Example: the name "AUERBACH" is encoded as both
+     * </p>
+     * <ul>
+     * <li>097400</li>
+     * <li>097500</li>
+     * </ul>
+     * <p>
+     * Thus the result will be "097400|097500".
+     * </p>
+     *
+     * @param source
+     *            A String object to encode
+     * @return A string containing a set of DM Soundex codes corresponding to 
the String supplied
+     * @throws IllegalArgumentException
+     *             if a character is not mapped
+     */
+    public String soundex(final String source) {
+        final String[] branches = soundex(source, true);
+        final StringBuilder sb = new StringBuilder();
+        int index = 0;
+        for (final String branch : branches) {
+            sb.append(branch);
+            if (++index < branches.length) {
+                sb.append('|');
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Perform the actual DM Soundex algorithm on the input string.
+     *
+     * @param source
+     *            A String object to encode
+     * @param branching
+     *            If branching shall be performed
+     * @return A string array containing all DM Soundex codes corresponding to 
the String supplied depending on the
+     *         selected branching mode
+     */
+    private String[] soundex(final String source, final boolean branching) {
+        if (source == null) {
+            return null;
+        }
+
+        final String input = cleanup(source);
+
+        final Set<Branch> currentBranches = new LinkedHashSet<>();
+        currentBranches.add(new Branch());
+
+        char lastChar = '\0';
+        for (int index = 0; index < input.length(); index++) {
+            final char ch = input.charAt(index);
+
+            // ignore whitespace inside a name
+            if (Character.isWhitespace(ch)) {
+                continue;
+            }
+
+            final String inputContext = input.substring(index);
+            final List<Rule> rules = RULES.get(ch);
+            if (rules == null) {
+                continue;
+            }
+
+            // use an EMPTY_LIST to avoid false positive warnings wrt 
potential null pointer access
+            final List<Branch> nextBranches = branching ? new ArrayList<>() : 
Collections.<Branch>emptyList();
+
+            for (final Rule rule : rules) {
+                if (rule.matches(inputContext)) {
+                    if (branching) {
+                        nextBranches.clear();
+                    }
+                    final String[] replacements = 
rule.getReplacements(inputContext, lastChar == '\0');
+                    final boolean branchingRequired = replacements.length > 1 
&& branching;
+
+                    for (final Branch branch : currentBranches) {
+                        for (final String nextReplacement : replacements) {
+                            // if we have multiple replacements, always create 
a new branch
+                            final Branch nextBranch = branchingRequired ? 
branch.createBranch() : branch;
+
+                            // special rule: occurrences of mn or nm are 
treated differently
+                            final boolean force = (lastChar == 'm' && ch == 
'n') || (lastChar == 'n' && ch == 'm');
+
+                            nextBranch.processNextReplacement(nextReplacement, 
force);
+
+                            if (branching) {
+                                nextBranches.add(nextBranch);
+                            } else {
+                                break;
+                            }
+                        }
+                    }
+
+                    if (branching) {
+                        currentBranches.clear();
+                        currentBranches.addAll(nextBranches);
+                    }
+                    index += rule.getPatternLength() - 1;
+                    break;
+                }
+            }
+
+            lastChar = ch;
+        }
+
+        final String[] result = new String[currentBranches.size()];
+        int index = 0;
+        for (final Branch branch : currentBranches) {
+            branch.finish();
+            result[index++] = branch.toString();
+        }
+
+        return result;
+    }
+}
diff --git 
a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java 
b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
index 737d2c9..76e257a 100644
--- a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
+++ b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
@@ -1,252 +1,252 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.codec.language;
-
-import java.util.HashSet;
-import java.util.Locale;
-import java.util.Set;
-
-import org.apache.commons.codec.EncoderException;
-import org.apache.commons.codec.StringEncoderAbstractTest;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Tests the <code>ColognePhonetic</code> class.
- *
- * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
- *
- */
-public class ColognePhoneticTest extends 
StringEncoderAbstractTest<ColognePhonetic> {
-
-    private static final Set<String> TESTSET = new HashSet<String>();
-
-    private static boolean hasTestCase(String re) {
-        for(String s : TESTSET) {
-            if (s.matches(re)) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    // Character sequences to be tested by the code
-    private static final String MATCHES[] = {
-            ".*[AEIOUJY].*",         // A, E, I, J, O, U, Y
-            ".*H.*",                 // H
-            ".*B.*",                 // B
-            ".*P[^H].*",             // P not before H
-            ".*[DT][^CSZ].*",        // D,T not before C,S,Z
-            ".*[FVW].*",             // F,V,W
-            ".*PH.*",                // P before H
-            ".*[GKQ].*",             // G,K,Q
-            "C[AHKLOQRUX].*",        // Initial C before A, H, K, L, O, Q, R, 
U, X
-            ".*[^SZ]C[AHKLOQRUX].*", // C before A, H, K, L, O, Q, R, U, X but 
not after S, Z
-            ".*[^CKQ]X.*",           // X not after C,K,Q
-            ".*L.*",                 // L
-            ".*[MN].*",              // M,N
-            ".*R.*",                 // R
-            ".*[SZ].*",              // S,Z
-            ".*[SZ]C.*",             // C after S,Z
-            "C[^AHKLOQRUX].*",       // Initial C except before A, H, K, L, O, 
Q, R, U, X
-            ".+C[^AHKLOQRUX].*",     // C except before A, H, K, L, O, Q, R, 
U, X
-            ".*[DT][CSZ].*",         // D,T before C,S,Z
-            ".*[CKQ]X.*",            // X after C,K,Q
-    };
-
-    @AfterClass
-    // Check that all possible input sequence conditions are represented
-    public static void finishTests() {
-        int errors = 0;
-        for(String m : MATCHES) {
-            if (!hasTestCase(m)) {
-                System.out.println(m + " has no test case");
-                errors++;
-            }
-        }
-        Assert.assertEquals("Not expecting any missing test cases", 0, errors);
-    }
-
-    @Override
-    // Capture test strings for later checking
-    public void checkEncoding(String expected, String source) throws 
EncoderException {
-        // Note that the German letter Eszett is converted to SS by 
toUpperCase, so we don't need to replace it
-        TESTSET.add(source.toUpperCase(Locale.GERMAN).replace('Ä', 
'A').replace('Ö', 'O').replace('Ü', 'U'));
-        super.checkEncoding(expected, source);
-    }
-
-    @Override
-    protected ColognePhonetic createStringEncoder() {
-        return new ColognePhonetic();
-    }
-
-    @Test(expected=org.junit.ComparisonFailure.class)
-    // Ensure that override still allows tests to work
-    public void testCanFail() throws EncoderException {
-        this.checkEncoding("/", "Fehler");
-    }
-
-    @Test
-    public void testAabjoe() throws EncoderException {
-        this.checkEncoding("01", "Aabjoe");
-    }
-
-    @Test
-    public void testAaclan() throws EncoderException {
-        this.checkEncoding("0856", "Aaclan");
-    }
-
-    /**
-     * Tests [CODEC-122]
-     *
-     * @throws EncoderException
-     */
-    @Test
-    public void testAychlmajrForCodec122() throws EncoderException {
-        this.checkEncoding("04567", "Aychlmajr");
-    }
-
-    @Test
-    public void testEdgeCases() throws EncoderException {
-        final String[][] data = {
-            {"a", "0"},
-            {"e", "0"},
-            {"i", "0"},
-            {"o", "0"},
-            {"u", "0"},
-            {"\u00E4", "0"}, // a-umlaut
-            {"\u00F6", "0"}, // o-umlaut
-            {"\u00FC", "0"}, // u-umlaut
-            {"\u00DF", "8"}, // small sharp s
-            {"aa", "0"},
-            {"ha", "0"},
-            {"h", ""},
-            {"aha", "0"},
-            {"b", "1"},
-            {"p", "1"},
-            {"ph", "3"},
-            {"f", "3"},
-            {"v", "3"},
-            {"w", "3"},
-            {"g", "4"},
-            {"k", "4"},
-            {"q", "4"},
-            {"x", "48"},
-            {"ax", "048"},
-            {"cx", "48"},
-            {"l", "5"},
-            {"cl", "45"},
-            {"acl", "085"},
-            {"mn", "6"},
-            {"{mn}","6"}, // test chars above Z
-            {"r", "7"}};
-        this.checkEncodings(data);
-    }
-
-    @Test
-    public void testExamples() throws EncoderException {
-        final String[][] data = {
-            {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
-            {"m\u00FCller", "657"}, // müller - add equivalent lower-case
-            {"schmidt", "862"},
-            {"schneider", "8627"},
-            {"fischer", "387"},
-            {"weber", "317"},
-            {"wagner", "3467"},
-            {"becker", "147"},
-            {"hoffmann", "0366"},
-            {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
-            {"sch\u00e4fer", "837"}, // schäfer - add equivalent lower-case
-            {"Breschnew", "17863"},
-            {"Wikipedia", "3412"},
-            {"peter", "127"},
-            {"pharma", "376"},
-            {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
-            {"deutsch", "28"},
-            {"deutz", "28"},
-            {"hamburg", "06174"},
-            {"hannover", "0637"},
-            {"christstollen", "478256"},
-            {"Xanthippe", "48621"},
-            {"Zacharias", "8478"},
-            {"Holzbau", "0581"},
-            {"matsch", "68"},
-            {"matz", "68"},
-            {"Arbeitsamt", "071862"},
-            {"Eberhard", "01772"},
-            {"Eberhardt", "01772"},
-            {"Celsius", "8588"},
-            {"Ace", "08"},
-            {"heithabu", "021"}};
-        this.checkEncodings(data);
-    }
-
-    @Test
-    public void testHyphen() throws EncoderException {
-        final String[][] data = {{"bergisch-gladbach", "174845214"},
-                {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // 
Müller-Lüdenscheidt
-        this.checkEncodings(data);
-    }
-
-    @Test
-    public void testIsEncodeEquals() {
-        //@formatter:off
-        final String[][] data = {
-            {"Muller", "M\u00fcller"}, // Müller
-            {"Meyer", "Mayr"},
-            {"house", "house"},
-            {"House", "house"},
-            {"Haus", "house"},
-            {"ganz", "Gans"},
-            {"ganz", "G\u00e4nse"}, // Gänse
-            {"Miyagi", "Miyako"}};
-        //@formatter:on
-        for (final String[] element : data) {
-            final boolean encodeEqual = 
this.getStringEncoder().isEncodeEqual(element[1], element[0]);
-            Assert.assertTrue(element[1] + " != " + element[0], encodeEqual);
-        }
-    }
-
-    @Test
-    public void testVariationsMella() throws EncoderException {
-        final String data[] = {"mella", "milah", "moulla", "mellah", "muehle", 
"mule"};
-        this.checkEncodingVariations("65", data);
-    }
-
-    @Test
-    public void testVariationsMeyer() throws EncoderException {
-        final String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", 
"Mejer", "Major"};
-        this.checkEncodingVariations("67", data);
-    }
-
-    @Test
-    public void testSpecialCharsBetweenSameLetters() throws EncoderException {
-        final String data[] = {"Test test", "Testtest", "Test-test", 
"TesT#Test", "TesT?test"};
-        this.checkEncodingVariations("28282", data);
-    }
-
-    // Allow command-line testing
-    public static void main(String args[]) {
-        ColognePhonetic coder = new ColognePhonetic();
-        for(String arg : args) {
-            String code = coder.encode(arg);
-            System.out.println("'" + arg + "' = '" + code + "'");
-        }
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.StringEncoderAbstractTest;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests the <code>ColognePhonetic</code> class.
+ *
+ * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
+ *
+ */
+public class ColognePhoneticTest extends 
StringEncoderAbstractTest<ColognePhonetic> {
+
+    private static final Set<String> TESTSET = new HashSet<>();
+
+    private static boolean hasTestCase(String re) {
+        for(String s : TESTSET) {
+            if (s.matches(re)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    // Character sequences to be tested by the code
+    private static final String MATCHES[] = {
+            ".*[AEIOUJY].*",         // A, E, I, J, O, U, Y
+            ".*H.*",                 // H
+            ".*B.*",                 // B
+            ".*P[^H].*",             // P not before H
+            ".*[DT][^CSZ].*",        // D,T not before C,S,Z
+            ".*[FVW].*",             // F,V,W
+            ".*PH.*",                // P before H
+            ".*[GKQ].*",             // G,K,Q
+            "C[AHKLOQRUX].*",        // Initial C before A, H, K, L, O, Q, R, 
U, X
+            ".*[^SZ]C[AHKLOQRUX].*", // C before A, H, K, L, O, Q, R, U, X but 
not after S, Z
+            ".*[^CKQ]X.*",           // X not after C,K,Q
+            ".*L.*",                 // L
+            ".*[MN].*",              // M,N
+            ".*R.*",                 // R
+            ".*[SZ].*",              // S,Z
+            ".*[SZ]C.*",             // C after S,Z
+            "C[^AHKLOQRUX].*",       // Initial C except before A, H, K, L, O, 
Q, R, U, X
+            ".+C[^AHKLOQRUX].*",     // C except before A, H, K, L, O, Q, R, 
U, X
+            ".*[DT][CSZ].*",         // D,T before C,S,Z
+            ".*[CKQ]X.*",            // X after C,K,Q
+    };
+
+    @AfterClass
+    // Check that all possible input sequence conditions are represented
+    public static void finishTests() {
+        int errors = 0;
+        for(String m : MATCHES) {
+            if (!hasTestCase(m)) {
+                System.out.println(m + " has no test case");
+                errors++;
+            }
+        }
+        Assert.assertEquals("Not expecting any missing test cases", 0, errors);
+    }
+
+    @Override
+    // Capture test strings for later checking
+    public void checkEncoding(String expected, String source) throws 
EncoderException {
+        // Note that the German letter Eszett is converted to SS by 
toUpperCase, so we don't need to replace it
+        TESTSET.add(source.toUpperCase(Locale.GERMAN).replace('Ä', 
'A').replace('Ö', 'O').replace('Ü', 'U'));
+        super.checkEncoding(expected, source);
+    }
+
+    @Override
+    protected ColognePhonetic createStringEncoder() {
+        return new ColognePhonetic();
+    }
+
+    @Test(expected=org.junit.ComparisonFailure.class)
+    // Ensure that override still allows tests to work
+    public void testCanFail() throws EncoderException {
+        this.checkEncoding("/", "Fehler");
+    }
+
+    @Test
+    public void testAabjoe() throws EncoderException {
+        this.checkEncoding("01", "Aabjoe");
+    }
+
+    @Test
+    public void testAaclan() throws EncoderException {
+        this.checkEncoding("0856", "Aaclan");
+    }
+
+    /**
+     * Tests [CODEC-122]
+     *
+     * @throws EncoderException
+     */
+    @Test
+    public void testAychlmajrForCodec122() throws EncoderException {
+        this.checkEncoding("04567", "Aychlmajr");
+    }
+
+    @Test
+    public void testEdgeCases() throws EncoderException {
+        final String[][] data = {
+            {"a", "0"},
+            {"e", "0"},
+            {"i", "0"},
+            {"o", "0"},
+            {"u", "0"},
+            {"\u00E4", "0"}, // a-umlaut
+            {"\u00F6", "0"}, // o-umlaut
+            {"\u00FC", "0"}, // u-umlaut
+            {"\u00DF", "8"}, // small sharp s
+            {"aa", "0"},
+            {"ha", "0"},
+            {"h", ""},
+            {"aha", "0"},
+            {"b", "1"},
+            {"p", "1"},
+            {"ph", "3"},
+            {"f", "3"},
+            {"v", "3"},
+            {"w", "3"},
+            {"g", "4"},
+            {"k", "4"},
+            {"q", "4"},
+            {"x", "48"},
+            {"ax", "048"},
+            {"cx", "48"},
+            {"l", "5"},
+            {"cl", "45"},
+            {"acl", "085"},
+            {"mn", "6"},
+            {"{mn}","6"}, // test chars above Z
+            {"r", "7"}};
+        this.checkEncodings(data);
+    }
+
+    @Test
+    public void testExamples() throws EncoderException {
+        final String[][] data = {
+            {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
+            {"m\u00FCller", "657"}, // müller - add equivalent lower-case
+            {"schmidt", "862"},
+            {"schneider", "8627"},
+            {"fischer", "387"},
+            {"weber", "317"},
+            {"wagner", "3467"},
+            {"becker", "147"},
+            {"hoffmann", "0366"},
+            {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
+            {"sch\u00e4fer", "837"}, // schäfer - add equivalent lower-case
+            {"Breschnew", "17863"},
+            {"Wikipedia", "3412"},
+            {"peter", "127"},
+            {"pharma", "376"},
+            {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
+            {"deutsch", "28"},
+            {"deutz", "28"},
+            {"hamburg", "06174"},
+            {"hannover", "0637"},
+            {"christstollen", "478256"},
+            {"Xanthippe", "48621"},
+            {"Zacharias", "8478"},
+            {"Holzbau", "0581"},
+            {"matsch", "68"},
+            {"matz", "68"},
+            {"Arbeitsamt", "071862"},
+            {"Eberhard", "01772"},
+            {"Eberhardt", "01772"},
+            {"Celsius", "8588"},
+            {"Ace", "08"},
+            {"heithabu", "021"}};
+        this.checkEncodings(data);
+    }
+
+    @Test
+    public void testHyphen() throws EncoderException {
+        final String[][] data = {{"bergisch-gladbach", "174845214"},
+                {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // 
Müller-Lüdenscheidt
+        this.checkEncodings(data);
+    }
+
+    @Test
+    public void testIsEncodeEquals() {
+        //@formatter:off
+        final String[][] data = {
+            {"Muller", "M\u00fcller"}, // Müller
+            {"Meyer", "Mayr"},
+            {"house", "house"},
+            {"House", "house"},
+            {"Haus", "house"},
+            {"ganz", "Gans"},
+            {"ganz", "G\u00e4nse"}, // Gänse
+            {"Miyagi", "Miyako"}};
+        //@formatter:on
+        for (final String[] element : data) {
+            final boolean encodeEqual = 
this.getStringEncoder().isEncodeEqual(element[1], element[0]);
+            Assert.assertTrue(element[1] + " != " + element[0], encodeEqual);
+        }
+    }
+
+    @Test
+    public void testVariationsMella() throws EncoderException {
+        final String data[] = {"mella", "milah", "moulla", "mellah", "muehle", 
"mule"};
+        this.checkEncodingVariations("65", data);
+    }
+
+    @Test
+    public void testVariationsMeyer() throws EncoderException {
+        final String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", 
"Mejer", "Major"};
+        this.checkEncodingVariations("67", data);
+    }
+
+    @Test
+    public void testSpecialCharsBetweenSameLetters() throws EncoderException {
+        final String data[] = {"Test test", "Testtest", "Test-test", 
"TesT#Test", "TesT?test"};
+        this.checkEncodingVariations("28282", data);
+    }
+
+    // Allow command-line testing
+    public static void main(String args[]) {
+        ColognePhonetic coder = new ColognePhonetic();
+        for(String arg : args) {
+            String code = coder.encode(arg);
+            System.out.println("'" + arg + "' = '" + code + "'");
+        }
+    }
+}

Reply via email to