This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new 7b2ab4a [CODEC-257] Update from Java 7 to Java 8.
7b2ab4a is described below
commit 7b2ab4a2659b987b823c7cb0a163c766557da802
Author: Gary Gregory <[email protected]>
AuthorDate: Fri Mar 22 13:18:04 2019 -0400
[CODEC-257] Update from Java 7 to Java 8.
---
.travis.yml | 1 -
pom.xml | 4 +-
src/changes/changes.xml | 970 ++++++++---------
.../codec/language/DaitchMokotoffSoundex.java | 1106 ++++++++++----------
.../codec/language/ColognePhoneticTest.java | 504 ++++-----
5 files changed, 1292 insertions(+), 1293 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index 4dcedf4..9059fff 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,7 +17,6 @@ language: java
sudo: false
jdk:
- - openjdk7
- oraclejdk8
- oraclejdk9
- oraclejdk11
diff --git a/pom.xml b/pom.xml
index ed89be0..93f308f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -224,8 +224,8 @@ limitations under the License.
</dependency>
</dependencies>
<properties>
- <maven.compiler.source>1.7</maven.compiler.source>
- <maven.compiler.target>1.7</maven.compiler.target>
+ <maven.compiler.source>1.8</maven.compiler.source>
+ <maven.compiler.target>1.8</maven.compiler.target>
<commons.componentid>codec</commons.componentid>
<commons.module.name>org.apache.commons.codec</commons.module.name>
<commons.jira.id>CODEC</commons.jira.id>
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 2d763d6..18a6cf4 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -1,485 +1,485 @@
-<?xml version="1.0"?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<!-- $Id$ -->
-
-<!--
-This file is also used by the maven-changes-plugin to generate the release
notes.
-Useful ways of finding items to add to this file are:
-
-1. Add items when you fix a bug or add a feature (this makes the
-release process easy :-).
-
-2. Do a JIRA search for tickets closed since the previous release.
-
-3. Use the report generated by the maven-changelog-plugin to see all
-SVN commits. TBA how to use this with SVN.
-
-To generate the release notes from this file:
-
-mvn changes:announcement-generate -Prelease-notes [-Dchanges.version=m.n]
-
-The <action> type attribute can be add,update,fix,remove.
--->
-
-<document>
- <properties>
- <title>Changes</title>
- <author>Apache Commons Developers</author>
- </properties>
- <body>
-
- <release version="1.13" date="YYYY-MM-DD" description="TBD">
- <!--Nothing yet -->
- </release>
-
- <release version="1.12" date="2019-02-04" description="Feature and fix
release.">
- <!-- The first attribute below should be the issue id; makes it easier
to navigate in the IDE outline -->
- <action issue="CODEC-252" dev="chtompki" type="fix">B64 salt generator:
Random -> ThreadLocalRandom</action>
- <action issue="CODEC-250" dev="sebb" type="fix" due-to="Alex
Volodko">Wrong value calculated by Cologne Phonetic if a special character is
placed between equal letters</action>
- <action issue="CODEC-244" dev="ggregory" type="update">Update from Java
6 to Java 7</action>
- <action issue="CODEC-240" dev="ggregory" type="add" due-to="Ioannis
Sermetziadis">Add Percent-Encoding Codec (described in RFC3986 and
RFC7578)</action>
- <action issue="CODEC-246" dev="ggregory" type="fix" due-to="Oscar Luis
Vera Pérez">ColognePhoneticTest.testIsEncodeEquals missing assertions</action>
- <action issue="CODEC-251" dev="ggregory" type="add" due-to="Gary
Gregory">Add SHA-3 methods in DigestUtils</action>
- </release>
- <release version="1.11" date="2017-10-20" description="Feature and fix
release.">
- <!-- The first attribute below should be the issue id; makes it easier
to navigate in the IDE outline -->
- <action issue="CODEC-241" type="add">Add support for XXHash32</action>
- <action issue="CODEC-234" dev="ggregory" type="update"
due-to="Christopher Schultz, Sebb">Base32.decode should support lowercase
letters</action>
- <action issue="CODEC-233" dev="sebb" type="update" due-to="Yossi
Tamari">Soundex should support more algorithm variants</action>
- <action issue="CODEC-145" dev="sebb" type="fix" due-to="Jesse
Glick">Base64.encodeBase64String could better use newStringUsAscii (ditto
encodeBase64URLSafeString)</action>
- <action issue="CODEC-144" dev="sebb" type="fix">BaseNCodec:
encodeToString and encodeAsString methods are identical</action>
- <action issue="CODEC-232" dev="sebb" type="fix">URLCodec is neither
immutable nor threadsafe</action>
- <action issue="CODEC-231" dev="sebb"
type="fix">StringUtils.equals(CharSequence cs1, CharSequence cs2) can fail with
String Index OBE</action>
- <action issue="CODEC-230" dev="sebb" type="fix">URLCodec.WWW_FORM_URL
should be private</action>
- <action issue="CODEC-229" dev="sebb"
type="fix">StringUtils.newStringxxx(null) should return null, not NPE</action>
- <action issue="CODEC-220" dev="sebb" type="add">Fluent interface for
DigestUtils</action>
- <action issue="CODEC-222" dev="sebb" type="add">Fluent interface for
HmacUtils</action>
- <action issue="CODEC-225" dev="jochen" type="fix" due-to="Svetlin
Zarev">Fix minor resource leaks</action>
- <action issue="CODEC-223" dev="sebb" type="remove">Drop obsolete Ant
build</action>
- <action issue="CODEC-171" dev="sebb" type="add" due-to="Brett Okken">Add
support for CRC32-C</action>
- <action issue="CODEC-221" dev="sebb" type="update">HmacUtils.updateHmac
calls reset() unnecessarily</action>
- <action issue="CODEC-200" dev="sebb" type="fix" due-to="Luciano
Vernaschi">Base32.HEX_DECODE_TABLE contains the wrong value 32</action>
- <action issue="CODEC-207" dev="ggregory" type="fix" due-to="Gary
Gregory">Charsets Javadoc breaks build when using Java 8</action>
- <action issue="CODEC-199" dev="ggregory/sebb" type="fix" due-to="Yossi
Tamari">Bug in HW rule in Soundex</action>
- <action issue="CODEC-209" dev="ggregory" type="fix" due-to="Gary
Gregory">Javadoc for SHA-224 DigestUtils methods should mention Java 1.8.0
restriction instead of 1.4.0.</action>
- <action issue="CODEC-219" dev="ggregory" type="fix" due-to="Gary
Gregory, Sebb">Don't deprecate Charsets Charset constants in favor of Java 7's
java.nio.charset.StandardCharsets</action>
- <action issue="CODEC-217" dev="ggregory" type="add" due-to="Gary
Gregory">Add HmacAlgorithms.HMAC_SHA_224 (Java 8 only)</action>
- <action issue="CODEC-213" dev="ggregory" type="add" due-to="Gary
Gregory">Support JEP 287: SHA-3 Hash Algorithms</action>
- <action issue="CODEC-212" dev="ggregory" type="add" due-to="Gary
Gregory">Create a minimal Digest command line utility:
org.apache.commons.codec.digest.Digest</action>
- <action issue="CODEC-210" dev="ggregory" type="add" due-to="Gary
Gregory">Add DigestUtils.getDigest(String, MessageDigest)</action>
- <action issue="CODEC-208" dev="ggregory" type="add" due-to="Gary
Gregory">Make some DigestUtils APIs public</action>
- <action issue="CODEC-206" dev="ggregory" type="add" due-to="Gary
Gregory">Add java.io.File APIs to MessageDigestAlgorithm</action>
- <action issue="CODEC-183" dev="ggregory" type="add" due-to="Steven
Wurster">BaseNCodecOutputStream only supports writing EOF on close()</action>
- <action issue="CODEC-195" dev="ggregory" type="add" due-to="Gary
Gregory">Support SHA-224 in DigestUtils on Java 8</action>
- <action issue="CODEC-194" dev="ggregory" type="add" due-to="Gary
Gregory">Support java.nio.ByteBuffer in
org.apache.commons.codec.binary.Hex</action>
- <action issue="CODEC-193" dev="ggregory" type="add" due-to="Michael
Donaghy">Support java.nio.ByteBuffer in DigestUtils</action>
- <action issue="CODEC-202" dev="ggregory" type="add" due-to="Oleg
Kalnichevski">Add BaseNCodec.encode(byte[], int, int) input with offset and
length parameters for Base64 and Base32.</action>
- <action issue="CODEC-203" dev="ggregory" type="add" due-to="Gary
Gregory">Add convenience method decodeHex(String).</action>
- <action issue="CODEC-205" dev="ggregory" type="add" due-to="Gary
Gregory">Add faster CRC32 implementation.</action>
- <action issue="CODEC-224" dev="ggregory" type="add" due-to="Gary
Gregory">Add convenience API
org.apache.commons.codec.binary.Hex.encodeHexString(byte[]|ByteBuffer,
boolean).</action>
- <action issue="CODEC-242" dev="ggregory" type="add" due-to="Gary
Gregory">Add Automatic-Module-Name manifest entry for Java 9.</action>
- </release>
- <release version="1.10" date="5 November 2014" description="Feature and
fix release.">
- <action dev="ggregory" type="add" issue="CODEC-192" due-to="Thomas
Neidhart">Add Daitch-Mokotoff Soundex</action>
- <action dev="ggregory" type="add" issue="CODEC-121" due-to="Thomas
Neidhart, Java John">QuotedPrintableCodec does not support soft line break per
the 'quoted-printable' example on Wikipedia</action>
- <action dev="tn" type="fix" issue="CODEC-185" due-to="Sean Busbey">Added
clarification to Javadoc of Base64 concerning the use of the urlSafe
parameter</action>
- <action dev="tn" type="fix" issue="CODEC-191" due-to="Igor Savin">Added
clarification to the Javadoc of Base[32|64]OutputStream that it is mandatory to
call close()</action>
- <action dev="ggregory" type="fix" issue="CODEC-188" due-to="Hendrik
Saly">Add support for HMAC Message Authentication Code (MAC) digests</action>
- <action dev="ggregory" type="fix" issue="CODEC-187" due-to="Michael
Tobias, Thomas Neidhart">Beider Morse Phonetic Matching producing incorrect
tokens</action>
- <action dev="ggregory" type="fix" issue="CODEC-184" due-to="Cyrille
Artho">NullPointerException in DoubleMetaPhone.isDoubleMetaphoneEqual when
using empty strings</action>
- <action dev="ggregory" type="add" issue="CODEC-181" due-to="Ivan
Martinez-Ortiz">Make possible to provide padding byte to BaseNCodec in
constructor</action>
- <action dev="ggregory" type="fix" issue="CODEC-180" due-to="Ville
Skyttä">Fix Javadoc 1.8.0 errors</action>
- <action dev="ggregory" type="update" issue="CODEC-178">Deprecate
Charsets Charset constants in favor of Java 7's
java.nio.charset.StandardCharsets</action>
- <action dev="ggregory" type="fix" issue="CODEC-189">Fix Java 8 build
Javadoc errors</action>
- <action dev="ggregory" type="update" issue="CODEC-190">Update from
commons-parent 34 to 35</action>
- </release>
- <release version="1.9" date="20 December 2013" description="Feature and
fix release.">
- <action dev="ggregory" type="update" issue="CODEC-174" due-to="Thomas
Champagne">Improve performance of Beider Morse encoder</action>
- <action dev="ggregory" type="fix" issue="CODEC-175">Beider Morse does
not close Scanners used to read config files</action>
- <action dev="sebb" type="fix" issue="CODEC-172" due-to="Matt
Bishop">Base32 decode table has spurious value</action>
- <action dev="ggregory" type="fix" issue="CODEC-170" due-to="Ron Wheeler,
Henri Yandell">Link broken in Metaphone Javadoc</action>
- <action dev="ggregory" type="fix" issue="CODEC-176" due-to="Ville
Skyttä">Spelling fixes in Javadoc and comments</action>
- </release>
- <release version="1.8" date="19 April 2013" description="Feature and fix
release. Requires a minimum of Java 1.6.">
- <action dev="ggregory" type="add" issue="CODEC-168" due-to="Daniel
Cassidy">Add DigestUtils.updateDigest(MessageDigest, InputStream).</action>
- <action dev="julius" type="add" issue="CODEC-167">Add JUnit to test our
decode with pad character in the middle.</action>
- <action dev="ggregory" type="add" issue="CODEC-161" due-to="crice">Add
Match Rating Approach (MRA) phonetic algorithm encoder.</action>
- <action dev="ggregory" type="fix" issue="CODEC-163"
due-to="leo141">ColognePhonetic encoder unnecessarily creates many char arrays
on every loop run.</action>
- <action dev="sebb" type="fix"
issue="CODEC-160">Base64.encodeBase64URLSafeString doesn't add padding
characters at the end.</action>
- </release>
- <release version="1.7" date="11 September 2012" description="Feature and
fix release. Requires a minimum of Java 1.6.">
- <action issue="CODEC-157" dev="ggregory" type="add" due-to="ggregory">
- DigestUtils: Add MD2 APIs.
- </action>
- <action issue="CODEC-156" dev="ggregory" type="add" due-to="ggregory">
- DigestUtils: add APIs named after standard algorithm name SHA-1.
- </action>
- <action issue="CODEC-155" dev="ggregory" type="add" due-to="ggregory">
- DigestUtils.getDigest(String) should throw IllegalArgumentException
instead of RuntimeException.
- </action>
- <action issue="CODEC-153" dev="ggregory" type="add" due-to="ggregory">
- Create a class MessageDigestAlgorithms to define standard algorithm
names.
- </action>
- <action issue="CODEC-152" dev="ggregory" type="add" due-to="ggregory">
- DigestUtils.getDigest(String) loses the original exception.
- </action>
- <action issue="CODEC-151" dev="ggregory" type="add" due-to="lathspell">
- Remove unnecessary attempt to fill up the salt variable in UnixCrypt.
- </action>
- <action issue="CODEC-150" dev="ggregory" type="add" due-to="lathspell">
- Remove unnecessary call to Math.abs().
- </action>
- <action issue="CODEC-148" dev="ggregory" type="add" due-to="lathspell">
- More tests and minor things.
- </action>
- <action issue="CODEC-146" dev="tn" type="add" due-to="Julius Davies">
- Added regression tests for PhoneticEngine based on Solr-3.6.0.
- </action>
- <action issue="CODEC-147" dev="tn" type="update">
- BeiderMorseEncoder/PhoneticEngine: make results deterministic by using
a LinkedHashSet
- instead of a HashSet.
- </action>
- <action issue="CODEC-143" dev="sebb" type="update">
- StringBuffer could be replaced by StringBuilder for local variables.
- </action>
- <action issue="CODEC-139" dev="ggregory" type="add" due-to="dsebastien">
- DigestUtils: add updateDigest methods and make methods public.
- </action>
- <action issue="CODEC-133" dev="ggregory" type="add" due-to="lathspell">
- Add classes for MD5/SHA1/SHA-512-based Unix crypt(3) hash variants.
- </action>
- <action issue="CODEC-96" dev="ggregory" type="fix" due-to="sebb">
- Base64 encode() method is no longer thread-safe, breaking clients
using it as a shared BinaryEncoder.
- Note: the fix breaks binary compatibility, however the changes are to
a class (BaseNCodec) which is
- intended for internal use.
- </action>
- <action issue="CODEC-138" dev="sebb" type="fix">
- Complete FilterInputStream interface for BaseNCodecInputStream.
- </action>
- <action issue="CODEC-136" dev="ggregory" type="fix">
- Use Charset objects when possible, create Charsets for required
character encodings.
- </action>
- <action issue="CODEC-132" dev="ggregory" type="fix" due-to="rcmuir">
- BeiderMorseEncoder OOM issues.
- </action>
- <action issue="CODEC-131" dev="tn" type="fix" due-to="smolav">
- DoubleMetaphone Javadoc contains dead links.
- </action>
- <action issue="CODEC-130" dev="ggregory" type="add" due-to="tn">
- Base64InputStream.skip skips underlying stream, not output.
- </action>
- <action issue="CODEC-63" dev="ggregory" type="add" due-to="tn">
- Implement NYSIIS phonetic encoder.
- </action>
- </release>
- <release version="1.6" date="20 November 2011" description="Feature and
fix release. Requires a minimum of Java 1.5.">
- <action dev="ggregory" type="fix" issue="CODEC-129" due-to="ggregory">
- Use standard Maven directory layout.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-128"
due-to="[email protected]">
- Documentation spelling fixes.
- </action>
- <action dev="ggregory, sebb" type="fix" issue="CODEC-127">
- Fix various character encoding issues in comments and test cases.
- </action>
- <action dev="ggregory, matthewpocock" type="update" issue="CODEC-125"
due-to="Matthew Pocock">
- Implement a Beider-Morse phonetic matching codec.
- </action>
- <action dev="ggregory" type="update" issue="CODEC-119">
- Migrate to Java 5.
- </action>
- <action dev="ggregory" type="update" issue="CODEC-120">
- Migrate to JUnit 4.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-123">
- ColognePhonetic Javadoc should use HTML entities for special
characters.
- </action>
- </release>
- <release version="1.5" date="29 March 2011" description="Feature and fix
release. Requires a minimum of Java 1.4.">
- <action dev="sebb" type="add" issue="CODEC-88">
- Added new Base32 encoder.
- </action>
- <action dev="sebb" type="fix" issue="CODEC-89">
- new Base64().encode() appends a CRLF, and chunks results into 76
character lines.
- </action>
- <action dev="sebb" type="fix" issue="CODEC-92">
- Many test cases use getBytes() which uses the default platform
encoding so tests may fail on some platforms.
- </action>
- <action dev="sebb, julius, ggregory" type="add" issue="CODEC-93"
due-to="sebb">
- Add test(s) to check that encodeBase64() does not chunk output.
- </action>
- <action dev="sebb" type="fix" issue="CODEC-97" due-to="mjryall">
- Base64 default constructor behaviour changed to enable chunking in 1.4.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-99" due-to="julius">
- Base64.encodeBase64String() shouldn't chunk.
- </action>
- <action dev="julius" type="fix" issue="CODEC-101" due-to="balusc">
- Base64InputStream#read(byte[]) incorrectly returns 0 at end of any
stream which is multiple of 3 bytes long.
- </action>
- <action dev="bayard" type="fix" issue="CODEC-103" due-to="gnuf">
- Typo in DecoderException message thrown from Hex.decodeHex.
- </action>
- <action dev="julius, ggregory" type="add" issue="CODEC-105" due-to="zak">
- ArrayIndexOutOfBoundsException when doing multiple reads() on encoding
Base64InputStream.
- </action>
- <action dev="bayard" type="add" issue="CODEC-106" due-to="it2mmeyerfa">
- Add the "Kölner Phonetik" encoder (Cologne Phonetic) to codec.lang.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-111" due-to="ggregory">
- org.apache.commons.codec.net.URLCodec.ESCAPE_CHAR isn't final but
should be.
- </action>
- <action dev="sebb" type="add" issue="CODEC-112" due-to="sebb">
- Base64.encodeBase64(byte[] binaryData, boolean isChunked, boolean
urlSafe, int maxResultSize) throws IAE for valid maxResultSize if isChunked is
false.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-113" due-to="ggregory">
- org.apache.commons.codec.language.RefinedSoundex.US_ENGLISH_MAPPING
should be package protected MALICIOUS_CODE.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-114" due-to="ggregory">
- org.apache.commons.codec.language.Soundex.US_ENGLISH_MAPPING should be
package protected MALICIOUS_CODE.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-115" due-to="sebb">
- DoubleMetaphone.maxCodeLen should probably be private.
- </action>
- <action dev="ggregory" type="remove" issue="CODEC-116" due-to="ggregory">
- Remove deprecated package private method
Base64.discardWhitespace(byte[])
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-117" due-to="ggregory">
- Caverphone encodes names starting and ending with "mb" incorrectly.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-118" due-to="ggregory">
- Split Caverphone class into two classes for Caverphone 1.0 and 2.0.
- </action>
- </release>
- <release version="1.4" date="9 August 2009" description="Feature and fix
release. Requires a minimum of Java 1.4.">
- <action dev="ggregory" type="fix" issue="CODEC-80" due-to="Julius
Davies">
- Regression: Base64.encode(chunk=true) has bug when input length is
multiple of 76.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-77" due-to="Julius
Davies">
- Base64 bug with empty input (new byte[0]).
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-75" due-to="Julius
Davies">
- Make Base64 URL-safe.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-74">
- Allow for uppercase letters output in Hex.encodeHex().
- </action>
- <action dev="bayard" type="fix" issue="CODEC-72" due-to="Sebb">
- Soundex and RefinedSoundex issues with character arrays.
- </action>
- <action dev="bayard" type="fix" issue="CODEC-71" due-to="Sebb">
- Base64.isArrayByteBase64() method is inefficient for large byte arrays.
- </action>
- <action dev="bayard" type="fix" issue="CODEC-70" due-to="Sebb">
- Thread safety and malicious code safety improvements.
- </action>
- <action dev="bayard" type="fix" issue="CODEC-68" due-to="Robert
Rodewald">
- isBase64 throws ArrayIndexOutOfBoundsException on some non-BASE64
bytes.
- </action>
- <action dev="bayard" type="fix" issue="CODEC-65" due-to="Benjamin
Bentmann">
- Fix case-insensitive string handling.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-61" due-to="Igor
Slepchin">
- Base64.encodeBase64() throws NegativeArraySizeException on large files.
- </action>
- <action dev="bayard" type="add" issue="CODEC-60">
- Implement Caverphone.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-58" due-to="Julius
Davies">
- Character set used by Base64 not documented.
- </action>
- <action dev="bayard" type="fix" issue="CODEC-56" due-to="Sebb">
- RefinedSoundex creates instance before al fields have been initialized.
- </action>
- <action dev="bayard" type="add" issue="CODEC-52" due-to="Niklas
Gustavsson">
- Digest on InputStreams.
- </action>
- <action dev="bayard" type="fix" issue="CODEC-51">
- 2 Test failures in SoundexTest.
- </action>
- <action dev="bayard" type="add" issue="CODEC-40" due-to="Chris Black">
- Patch to add crypto-compatible BigInteger encoding support to Base64.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-39" due-to="Jamie
Flournoy" due-to-email="[email protected]">
- DigestUtils: Add methods for SHA-256, SHA-384, and SHA-512.
- </action>
- <action dev="tobrien" type="fix" issue="CODEC-10" due-to="Reggie Riser"
due-to-email="[email protected]">
- Using US_ENGLISH in Soundex caused an NullPointerException.
- </action>
- <action dev="tobrien" type="fix" issue="CODEC-6" due-to="David Tonhofer">
- Source tarball spews files all over the place.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-22" due-to="Piero
Ottuzzi">
- Base64.isArrayByteBase64() throws an ArrayIndexOutOfBoundsException
for negative octets
- </action>
- <action dev="jochen" type="add" issue="CODEC-69" due-to="Julius Davies">
- Streaming Base64 (Base64InputStream and Base64OutputStream added).
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-78" due-to="Julius
Davies">
- Base64: Improve Code Coverage.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-59" due-to="Julius
Davies">
- Add methods to Base64 which work with String instead of byte[].
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-81" due-to="Julius
Davies">
- Base64's new constructor parameters ignored.
- </action>
- <action dev="niallp" type="fix" issue="CODEC-83">
- Improve Double Metaphone test coverage.
- </action>
- <action dev="niallp" type="fix" issue="CODEC-84">
- Double Metaphone bugs in alternative encoding.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-73" due-to="Benjamin
Bentmann">
- Make string2byte conversions indepedent of platform default encoding.
- </action>
- </release>
- <release version="1.3" date="10 July 2004" description="Feature and fix
release.">
- <action dev="ggregory, tobrien" type="add" issue="CODEC-21" due-to="Alex
Karasulu">
- BinaryCodec: Encodes and decodes binary to and from Strings of 0s and
1s.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
- QuotedPrintableCodec: Codec for RFC 1521 MIME (Multipurpose Internet
- Mail Extensions) Part One. Rules #3, #4, and #5 of the
quoted-printable spec
- are not implemented yet. See also issue CODEC-46.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
- BCodec: Identical to the Base64 encoding defined by RFC 1521 and
allows a
- character set to be specified.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
- QCodec: Similar to the Quoted-Printable content-transfer-encoding
defined
- in RFC 1521 and designed to allow text containing mostly ASCII
characters to
- be decipherable on an ASCII terminal without decoding.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew
Inger" due-to-email="[email protected]">
- Soundex: Implemented the DIFFERENCE algorithm.
- </action>
- <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew
Inger" due-to-email="[email protected]">
- RefinedSoundex: Implemented the DIFFERENCE algorithm.
- </action>
- <action dev="ggregory" type="update">
- This version is relesed under the
- <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License
2.0</a>
- , please see LICENSE.txt. Previous versions were released under the
- <a href="http://www.apache.org/licenses/LICENSE-1.1">Apache License
1.1</a>
- </action>
- <action dev="ggregory" type="update">
- The Board recommendation to remove Javadoc author tags has been
- implemented. All author tags are now "Apache Software Foundation".
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-25" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
- The default URL encoding logic was broken.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-31" due-to="Gary D.
Gregory">
- Base64 chunked encoding not compliant with RFC 2045 section 2.1 CRLF.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-5">
- Hex converts illegal characters to 255.
- </action>
- <action dev="tobrien" type="fix" issue="CODEC-17">
- Metaphone now correctly handles a silent B in a word that ends in MB.
- "COMB" is encoded as "KM", before this fix "COMB" was encoded as "KMB".
- </action>
- <action dev="ggregory" type="fix">
- Added missing tags in Javadoc comments.
- </action>
- <action dev="ggregory" type="fix">
- General Javadoc improvements.
- </action>
- </release>
- <release version="1.2" date="24 Nov 2003" description="Feature and fix
release.">
- <action dev="tobrien" type="add" due-to="Oleg Kalnichevski"
due-to-email="[email protected]">
- URLCodec: Implements the www-form-urlencoded encoding scheme.
- </action>
- <action dev="tobrien" type="add" due-to="Dave Dribin, David Graham">
- DigestUtils: Calculates MD5 and SHA digests.
- </action>
- <action dev="tobrien" type="fix" issue="CODEC-26" due-to="Brian Ewins">
- Modified Base64 to remedy non-compliance with RFC
- 2045. Non-Base64 characters were not being discarded during the
- decode. RFC 2045 explicitly states that all characters outside of the
- base64 alphabet are to be ignored.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-4">
- Hex.decode(Object) throws a ClassCastException when a String argument
is passed in.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-3">
- Soundex: The HW rule is not applied; hyphens and apostrophes are not
ignored.
- </action>
- <action dev="ggregory" type="fix" issue="CODEC-29">
- Soundex.setMaxLength causes bugs and is not needed.
- Calling Soundex.setMaxLength() with a value of 2 or less causes the
wrong
- answer to be returned. Since the encoding returned by Soundex is always
- of length 4 by definition (we do not use the '-' in as a letter-nnn
- separator) the need for a maxLength attribute is not needed. Deprecate
- the field and accessor methods.
- </action>
- <action dev="Members" type="fix">
- Fix in Metaphone relating to the handling of the maximum code length.
- </action>
- </release>
- <release version="1.1" date="29 April 2003"
- description="The first official release. Numerous projects had been
depending on version 1.0-dev while in the Sandbox.">
- <action dev="Members" type="add">
- A newer version of the Base64 class reflecting improvements from
- both the commons-httpclient and xml-rpc versions of code forked
- from catalina.
- </action>
- <action dev="Members" type="add">
- Base64 class from commons-httpclient in org.apache.commons.codec.base64
- has been retained for backwards compatibility but has been deprecated.
- </action>
- <action dev="Members" type="add">
- Soundex class from commons-util in org.apache.commons.codec.
- </action>
- <action dev="Members" type="add">
- Metaphone class from commons-util in org.apache.commons.codec.
- </action>
- <action dev="tobrien" type="add">
- RefinedSoundex class in org.apache.commons.codec.
- </action>
- <action dev="Members" type="add">
- Encoder/Decoder interfaces in org.apache.commons.
- </action>
- <action dev="Members" type="add">
- String and Binary specific Encoder/Decoder interfaces in
org.apache.commons.
- </action>
- <action dev="Members" type="add">
- StringEncoderComparator replaces the SoundexComparator from the
language package.
- </action>
- <action dev="Members" type="fix">
- Base64 now discards whitespace characters when decoding encoded
content.
- </action>
- </release>
- <release version="1.0-dev" date="25 April 2003" description="Last release
from the Sandbox.">
- <action dev="tobrien" type="add">
- Base64 class from commons-httpclient in
org.apache.commons.codec.base64.
- </action>
- <action dev="tobrien" type="add">
- Soundex class from commons-util in org.apache.commons.codec.
- </action>
- <action dev="tobrien" type="add">
- Metaphone class from commons-util in org.apache.commons.codec.
- </action>
- <action dev="Members" type="add">
- SoundexComparator class from commons-util in org.apache.commons.codec.
- </action>
- </release>
- </body>
-</document>
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- $Id$ -->
+
+<!--
+This file is also used by the maven-changes-plugin to generate the release
notes.
+Useful ways of finding items to add to this file are:
+
+1. Add items when you fix a bug or add a feature (this makes the
+release process easy :-).
+
+2. Do a JIRA search for tickets closed since the previous release.
+
+3. Use the report generated by the maven-changelog-plugin to see all
+SVN commits. TBA how to use this with SVN.
+
+To generate the release notes from this file:
+
+mvn changes:announcement-generate -Prelease-notes [-Dchanges.version=m.n]
+
+The <action> type attribute can be add,update,fix,remove.
+-->
+
+<document>
+ <properties>
+ <title>Changes</title>
+ <author>Apache Commons Developers</author>
+ </properties>
+ <body>
+
+ <release version="1.13" date="YYYY-MM-DD" description="TBD">
+ <action issue="CODEC-257" dev="ggregory" type="update">Update from Java
7 to Java 8</action>
+ </release>
+
+ <release version="1.12" date="2019-02-04" description="Feature and fix
release.">
+ <!-- The first attribute below should be the issue id; makes it easier
to navigate in the IDE outline -->
+ <action issue="CODEC-252" dev="chtompki" type="fix">B64 salt generator:
Random -> ThreadLocalRandom</action>
+ <action issue="CODEC-250" dev="sebb" type="fix" due-to="Alex
Volodko">Wrong value calculated by Cologne Phonetic if a special character is
placed between equal letters</action>
+ <action issue="CODEC-244" dev="ggregory" type="update">Update from Java
6 to Java 7</action>
+ <action issue="CODEC-240" dev="ggregory" type="add" due-to="Ioannis
Sermetziadis">Add Percent-Encoding Codec (described in RFC3986 and
RFC7578)</action>
+ <action issue="CODEC-246" dev="ggregory" type="fix" due-to="Oscar Luis
Vera Pérez">ColognePhoneticTest.testIsEncodeEquals missing assertions</action>
+ <action issue="CODEC-251" dev="ggregory" type="add" due-to="Gary
Gregory">Add SHA-3 methods in DigestUtils</action>
+ </release>
+ <release version="1.11" date="2017-10-20" description="Feature and fix
release.">
+ <!-- The first attribute below should be the issue id; makes it easier
to navigate in the IDE outline -->
+ <action issue="CODEC-241" type="add">Add support for XXHash32</action>
+ <action issue="CODEC-234" dev="ggregory" type="update"
due-to="Christopher Schultz, Sebb">Base32.decode should support lowercase
letters</action>
+ <action issue="CODEC-233" dev="sebb" type="update" due-to="Yossi
Tamari">Soundex should support more algorithm variants</action>
+ <action issue="CODEC-145" dev="sebb" type="fix" due-to="Jesse
Glick">Base64.encodeBase64String could better use newStringUsAscii (ditto
encodeBase64URLSafeString)</action>
+ <action issue="CODEC-144" dev="sebb" type="fix">BaseNCodec:
encodeToString and encodeAsString methods are identical</action>
+ <action issue="CODEC-232" dev="sebb" type="fix">URLCodec is neither
immutable nor threadsafe</action>
+ <action issue="CODEC-231" dev="sebb"
type="fix">StringUtils.equals(CharSequence cs1, CharSequence cs2) can fail with
String Index OBE</action>
+ <action issue="CODEC-230" dev="sebb" type="fix">URLCodec.WWW_FORM_URL
should be private</action>
+ <action issue="CODEC-229" dev="sebb"
type="fix">StringUtils.newStringxxx(null) should return null, not NPE</action>
+ <action issue="CODEC-220" dev="sebb" type="add">Fluent interface for
DigestUtils</action>
+ <action issue="CODEC-222" dev="sebb" type="add">Fluent interface for
HmacUtils</action>
+ <action issue="CODEC-225" dev="jochen" type="fix" due-to="Svetlin
Zarev">Fix minor resource leaks</action>
+ <action issue="CODEC-223" dev="sebb" type="remove">Drop obsolete Ant
build</action>
+ <action issue="CODEC-171" dev="sebb" type="add" due-to="Brett Okken">Add
support for CRC32-C</action>
+ <action issue="CODEC-221" dev="sebb" type="update">HmacUtils.updateHmac
calls reset() unnecessarily</action>
+ <action issue="CODEC-200" dev="sebb" type="fix" due-to="Luciano
Vernaschi">Base32.HEX_DECODE_TABLE contains the wrong value 32</action>
+ <action issue="CODEC-207" dev="ggregory" type="fix" due-to="Gary
Gregory">Charsets Javadoc breaks build when using Java 8</action>
+ <action issue="CODEC-199" dev="ggregory/sebb" type="fix" due-to="Yossi
Tamari">Bug in HW rule in Soundex</action>
+ <action issue="CODEC-209" dev="ggregory" type="fix" due-to="Gary
Gregory">Javadoc for SHA-224 DigestUtils methods should mention Java 1.8.0
restriction instead of 1.4.0.</action>
+ <action issue="CODEC-219" dev="ggregory" type="fix" due-to="Gary
Gregory, Sebb">Don't deprecate Charsets Charset constants in favor of Java 7's
java.nio.charset.StandardCharsets</action>
+ <action issue="CODEC-217" dev="ggregory" type="add" due-to="Gary
Gregory">Add HmacAlgorithms.HMAC_SHA_224 (Java 8 only)</action>
+ <action issue="CODEC-213" dev="ggregory" type="add" due-to="Gary
Gregory">Support JEP 287: SHA-3 Hash Algorithms</action>
+ <action issue="CODEC-212" dev="ggregory" type="add" due-to="Gary
Gregory">Create a minimal Digest command line utility:
org.apache.commons.codec.digest.Digest</action>
+ <action issue="CODEC-210" dev="ggregory" type="add" due-to="Gary
Gregory">Add DigestUtils.getDigest(String, MessageDigest)</action>
+ <action issue="CODEC-208" dev="ggregory" type="add" due-to="Gary
Gregory">Make some DigestUtils APIs public</action>
+ <action issue="CODEC-206" dev="ggregory" type="add" due-to="Gary
Gregory">Add java.io.File APIs to MessageDigestAlgorithm</action>
+ <action issue="CODEC-183" dev="ggregory" type="add" due-to="Steven
Wurster">BaseNCodecOutputStream only supports writing EOF on close()</action>
+ <action issue="CODEC-195" dev="ggregory" type="add" due-to="Gary
Gregory">Support SHA-224 in DigestUtils on Java 8</action>
+ <action issue="CODEC-194" dev="ggregory" type="add" due-to="Gary
Gregory">Support java.nio.ByteBuffer in
org.apache.commons.codec.binary.Hex</action>
+ <action issue="CODEC-193" dev="ggregory" type="add" due-to="Michael
Donaghy">Support java.nio.ByteBuffer in DigestUtils</action>
+ <action issue="CODEC-202" dev="ggregory" type="add" due-to="Oleg
Kalnichevski">Add BaseNCodec.encode(byte[], int, int) input with offset and
length parameters for Base64 and Base32.</action>
+ <action issue="CODEC-203" dev="ggregory" type="add" due-to="Gary
Gregory">Add convenience method decodeHex(String).</action>
+ <action issue="CODEC-205" dev="ggregory" type="add" due-to="Gary
Gregory">Add faster CRC32 implementation.</action>
+ <action issue="CODEC-224" dev="ggregory" type="add" due-to="Gary
Gregory">Add convenience API
org.apache.commons.codec.binary.Hex.encodeHexString(byte[]|ByteBuffer,
boolean).</action>
+ <action issue="CODEC-242" dev="ggregory" type="add" due-to="Gary
Gregory">Add Automatic-Module-Name manifest entry for Java 9.</action>
+ </release>
+ <release version="1.10" date="5 November 2014" description="Feature and
fix release.">
+ <action dev="ggregory" type="add" issue="CODEC-192" due-to="Thomas
Neidhart">Add Daitch-Mokotoff Soundex</action>
+ <action dev="ggregory" type="add" issue="CODEC-121" due-to="Thomas
Neidhart, Java John">QuotedPrintableCodec does not support soft line break per
the 'quoted-printable' example on Wikipedia</action>
+ <action dev="tn" type="fix" issue="CODEC-185" due-to="Sean Busbey">Added
clarification to Javadoc of Base64 concerning the use of the urlSafe
parameter</action>
+ <action dev="tn" type="fix" issue="CODEC-191" due-to="Igor Savin">Added
clarification to the Javadoc of Base[32|64]OutputStream that it is mandatory to
call close()</action>
+ <action dev="ggregory" type="fix" issue="CODEC-188" due-to="Hendrik
Saly">Add support for HMAC Message Authentication Code (MAC) digests</action>
+ <action dev="ggregory" type="fix" issue="CODEC-187" due-to="Michael
Tobias, Thomas Neidhart">Beider Morse Phonetic Matching producing incorrect
tokens</action>
+ <action dev="ggregory" type="fix" issue="CODEC-184" due-to="Cyrille
Artho">NullPointerException in DoubleMetaPhone.isDoubleMetaphoneEqual when
using empty strings</action>
+ <action dev="ggregory" type="add" issue="CODEC-181" due-to="Ivan
Martinez-Ortiz">Make possible to provide padding byte to BaseNCodec in
constructor</action>
+ <action dev="ggregory" type="fix" issue="CODEC-180" due-to="Ville
Skyttä">Fix Javadoc 1.8.0 errors</action>
+ <action dev="ggregory" type="update" issue="CODEC-178">Deprecate
Charsets Charset constants in favor of Java 7's
java.nio.charset.StandardCharsets</action>
+ <action dev="ggregory" type="fix" issue="CODEC-189">Fix Java 8 build
Javadoc errors</action>
+ <action dev="ggregory" type="update" issue="CODEC-190">Update from
commons-parent 34 to 35</action>
+ </release>
+ <release version="1.9" date="20 December 2013" description="Feature and
fix release.">
+ <action dev="ggregory" type="update" issue="CODEC-174" due-to="Thomas
Champagne">Improve performance of Beider Morse encoder</action>
+ <action dev="ggregory" type="fix" issue="CODEC-175">Beider Morse does
not close Scanners used to read config files</action>
+ <action dev="sebb" type="fix" issue="CODEC-172" due-to="Matt
Bishop">Base32 decode table has spurious value</action>
+ <action dev="ggregory" type="fix" issue="CODEC-170" due-to="Ron Wheeler,
Henri Yandell">Link broken in Metaphone Javadoc</action>
+ <action dev="ggregory" type="fix" issue="CODEC-176" due-to="Ville
Skyttä">Spelling fixes in Javadoc and comments</action>
+ </release>
+ <release version="1.8" date="19 April 2013" description="Feature and fix
release. Requires a minimum of Java 1.6.">
+ <action dev="ggregory" type="add" issue="CODEC-168" due-to="Daniel
Cassidy">Add DigestUtils.updateDigest(MessageDigest, InputStream).</action>
+ <action dev="julius" type="add" issue="CODEC-167">Add JUnit to test our
decode with pad character in the middle.</action>
+ <action dev="ggregory" type="add" issue="CODEC-161" due-to="crice">Add
Match Rating Approach (MRA) phonetic algorithm encoder.</action>
+ <action dev="ggregory" type="fix" issue="CODEC-163"
due-to="leo141">ColognePhonetic encoder unnecessarily creates many char arrays
on every loop run.</action>
+ <action dev="sebb" type="fix"
issue="CODEC-160">Base64.encodeBase64URLSafeString doesn't add padding
characters at the end.</action>
+ </release>
+ <release version="1.7" date="11 September 2012" description="Feature and
fix release. Requires a minimum of Java 1.6.">
+ <action issue="CODEC-157" dev="ggregory" type="add" due-to="ggregory">
+ DigestUtils: Add MD2 APIs.
+ </action>
+ <action issue="CODEC-156" dev="ggregory" type="add" due-to="ggregory">
+ DigestUtils: add APIs named after standard algorithm name SHA-1.
+ </action>
+ <action issue="CODEC-155" dev="ggregory" type="add" due-to="ggregory">
+ DigestUtils.getDigest(String) should throw IllegalArgumentException
instead of RuntimeException.
+ </action>
+ <action issue="CODEC-153" dev="ggregory" type="add" due-to="ggregory">
+ Create a class MessageDigestAlgorithms to define standard algorithm
names.
+ </action>
+ <action issue="CODEC-152" dev="ggregory" type="add" due-to="ggregory">
+ DigestUtils.getDigest(String) loses the original exception.
+ </action>
+ <action issue="CODEC-151" dev="ggregory" type="add" due-to="lathspell">
+ Remove unnecessary attempt to fill up the salt variable in UnixCrypt.
+ </action>
+ <action issue="CODEC-150" dev="ggregory" type="add" due-to="lathspell">
+ Remove unnecessary call to Math.abs().
+ </action>
+ <action issue="CODEC-148" dev="ggregory" type="add" due-to="lathspell">
+ More tests and minor things.
+ </action>
+ <action issue="CODEC-146" dev="tn" type="add" due-to="Julius Davies">
+ Added regression tests for PhoneticEngine based on Solr-3.6.0.
+ </action>
+ <action issue="CODEC-147" dev="tn" type="update">
+ BeiderMorseEncoder/PhoneticEngine: make results deterministic by using
a LinkedHashSet
+ instead of a HashSet.
+ </action>
+ <action issue="CODEC-143" dev="sebb" type="update">
+ StringBuffer could be replaced by StringBuilder for local variables.
+ </action>
+ <action issue="CODEC-139" dev="ggregory" type="add" due-to="dsebastien">
+ DigestUtils: add updateDigest methods and make methods public.
+ </action>
+ <action issue="CODEC-133" dev="ggregory" type="add" due-to="lathspell">
+ Add classes for MD5/SHA1/SHA-512-based Unix crypt(3) hash variants.
+ </action>
+ <action issue="CODEC-96" dev="ggregory" type="fix" due-to="sebb">
+ Base64 encode() method is no longer thread-safe, breaking clients
using it as a shared BinaryEncoder.
+ Note: the fix breaks binary compatibility, however the changes are to
a class (BaseNCodec) which is
+ intended for internal use.
+ </action>
+ <action issue="CODEC-138" dev="sebb" type="fix">
+ Complete FilterInputStream interface for BaseNCodecInputStream.
+ </action>
+ <action issue="CODEC-136" dev="ggregory" type="fix">
+ Use Charset objects when possible, create Charsets for required
character encodings.
+ </action>
+ <action issue="CODEC-132" dev="ggregory" type="fix" due-to="rcmuir">
+ BeiderMorseEncoder OOM issues.
+ </action>
+ <action issue="CODEC-131" dev="tn" type="fix" due-to="smolav">
+ DoubleMetaphone Javadoc contains dead links.
+ </action>
+ <action issue="CODEC-130" dev="ggregory" type="add" due-to="tn">
+ Base64InputStream.skip skips underlying stream, not output.
+ </action>
+ <action issue="CODEC-63" dev="ggregory" type="add" due-to="tn">
+ Implement NYSIIS phonetic encoder.
+ </action>
+ </release>
+ <release version="1.6" date="20 November 2011" description="Feature and
fix release. Requires a minimum of Java 1.5.">
+ <action dev="ggregory" type="fix" issue="CODEC-129" due-to="ggregory">
+ Use standard Maven directory layout.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-128"
due-to="[email protected]">
+ Documentation spelling fixes.
+ </action>
+ <action dev="ggregory, sebb" type="fix" issue="CODEC-127">
+ Fix various character encoding issues in comments and test cases.
+ </action>
+ <action dev="ggregory, matthewpocock" type="update" issue="CODEC-125"
due-to="Matthew Pocock">
+ Implement a Beider-Morse phonetic matching codec.
+ </action>
+ <action dev="ggregory" type="update" issue="CODEC-119">
+ Migrate to Java 5.
+ </action>
+ <action dev="ggregory" type="update" issue="CODEC-120">
+ Migrate to JUnit 4.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-123">
+ ColognePhonetic Javadoc should use HTML entities for special
characters.
+ </action>
+ </release>
+ <release version="1.5" date="29 March 2011" description="Feature and fix
release. Requires a minimum of Java 1.4.">
+ <action dev="sebb" type="add" issue="CODEC-88">
+ Added new Base32 encoder.
+ </action>
+ <action dev="sebb" type="fix" issue="CODEC-89">
+ new Base64().encode() appends a CRLF, and chunks results into 76
character lines.
+ </action>
+ <action dev="sebb" type="fix" issue="CODEC-92">
+ Many test cases use getBytes() which uses the default platform
encoding so tests may fail on some platforms.
+ </action>
+ <action dev="sebb, julius, ggregory" type="add" issue="CODEC-93"
due-to="sebb">
+ Add test(s) to check that encodeBase64() does not chunk output.
+ </action>
+ <action dev="sebb" type="fix" issue="CODEC-97" due-to="mjryall">
+ Base64 default constructor behaviour changed to enable chunking in 1.4.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-99" due-to="julius">
+ Base64.encodeBase64String() shouldn't chunk.
+ </action>
+ <action dev="julius" type="fix" issue="CODEC-101" due-to="balusc">
+ Base64InputStream#read(byte[]) incorrectly returns 0 at end of any
stream which is multiple of 3 bytes long.
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-103" due-to="gnuf">
+ Typo in DecoderException message thrown from Hex.decodeHex.
+ </action>
+ <action dev="julius, ggregory" type="add" issue="CODEC-105" due-to="zak">
+ ArrayIndexOutOfBoundsException when doing multiple reads() on encoding
Base64InputStream.
+ </action>
+ <action dev="bayard" type="add" issue="CODEC-106" due-to="it2mmeyerfa">
+ Add the "Kölner Phonetik" encoder (Cologne Phonetic) to codec.lang.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-111" due-to="ggregory">
+ org.apache.commons.codec.net.URLCodec.ESCAPE_CHAR isn't final but
should be.
+ </action>
+ <action dev="sebb" type="add" issue="CODEC-112" due-to="sebb">
+ Base64.encodeBase64(byte[] binaryData, boolean isChunked, boolean
urlSafe, int maxResultSize) throws IAE for valid maxResultSize if isChunked is
false.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-113" due-to="ggregory">
+ org.apache.commons.codec.language.RefinedSoundex.US_ENGLISH_MAPPING
should be package protected MALICIOUS_CODE.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-114" due-to="ggregory">
+ org.apache.commons.codec.language.Soundex.US_ENGLISH_MAPPING should be
package protected MALICIOUS_CODE.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-115" due-to="sebb">
+ DoubleMetaphone.maxCodeLen should probably be private.
+ </action>
+ <action dev="ggregory" type="remove" issue="CODEC-116" due-to="ggregory">
+ Remove deprecated package private method
Base64.discardWhitespace(byte[])
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-117" due-to="ggregory">
+ Caverphone encodes names starting and ending with "mb" incorrectly.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-118" due-to="ggregory">
+ Split Caverphone class into two classes for Caverphone 1.0 and 2.0.
+ </action>
+ </release>
+ <release version="1.4" date="9 August 2009" description="Feature and fix
release. Requires a minimum of Java 1.4.">
+ <action dev="ggregory" type="fix" issue="CODEC-80" due-to="Julius
Davies">
+ Regression: Base64.encode(chunk=true) has bug when input length is
multiple of 76.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-77" due-to="Julius
Davies">
+ Base64 bug with empty input (new byte[0]).
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-75" due-to="Julius
Davies">
+ Make Base64 URL-safe.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-74">
+ Allow for uppercase letters output in Hex.encodeHex().
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-72" due-to="Sebb">
+ Soundex and RefinedSoundex issues with character arrays.
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-71" due-to="Sebb">
+ Base64.isArrayByteBase64() method is inefficient for large byte arrays.
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-70" due-to="Sebb">
+ Thread safety and malicious code safety improvements.
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-68" due-to="Robert
Rodewald">
+ isBase64 throws ArrayIndexOutOfBoundsException on some non-BASE64
bytes.
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-65" due-to="Benjamin
Bentmann">
+ Fix case-insensitive string handling.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-61" due-to="Igor
Slepchin">
+ Base64.encodeBase64() throws NegativeArraySizeException on large files.
+ </action>
+ <action dev="bayard" type="add" issue="CODEC-60">
+ Implement Caverphone.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-58" due-to="Julius
Davies">
+ Character set used by Base64 not documented.
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-56" due-to="Sebb">
+ RefinedSoundex creates instance before al fields have been initialized.
+ </action>
+ <action dev="bayard" type="add" issue="CODEC-52" due-to="Niklas
Gustavsson">
+ Digest on InputStreams.
+ </action>
+ <action dev="bayard" type="fix" issue="CODEC-51">
+ 2 Test failures in SoundexTest.
+ </action>
+ <action dev="bayard" type="add" issue="CODEC-40" due-to="Chris Black">
+ Patch to add crypto-compatible BigInteger encoding support to Base64.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-39" due-to="Jamie
Flournoy" due-to-email="[email protected]">
+ DigestUtils: Add methods for SHA-256, SHA-384, and SHA-512.
+ </action>
+ <action dev="tobrien" type="fix" issue="CODEC-10" due-to="Reggie Riser"
due-to-email="[email protected]">
+ Using US_ENGLISH in Soundex caused an NullPointerException.
+ </action>
+ <action dev="tobrien" type="fix" issue="CODEC-6" due-to="David Tonhofer">
+ Source tarball spews files all over the place.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-22" due-to="Piero
Ottuzzi">
+ Base64.isArrayByteBase64() throws an ArrayIndexOutOfBoundsException
for negative octets
+ </action>
+ <action dev="jochen" type="add" issue="CODEC-69" due-to="Julius Davies">
+ Streaming Base64 (Base64InputStream and Base64OutputStream added).
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-78" due-to="Julius
Davies">
+ Base64: Improve Code Coverage.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-59" due-to="Julius
Davies">
+ Add methods to Base64 which work with String instead of byte[].
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-81" due-to="Julius
Davies">
+ Base64's new constructor parameters ignored.
+ </action>
+ <action dev="niallp" type="fix" issue="CODEC-83">
+ Improve Double Metaphone test coverage.
+ </action>
+ <action dev="niallp" type="fix" issue="CODEC-84">
+ Double Metaphone bugs in alternative encoding.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-73" due-to="Benjamin
Bentmann">
+ Make string2byte conversions indepedent of platform default encoding.
+ </action>
+ </release>
+ <release version="1.3" date="10 July 2004" description="Feature and fix
release.">
+ <action dev="ggregory, tobrien" type="add" issue="CODEC-21" due-to="Alex
Karasulu">
+ BinaryCodec: Encodes and decodes binary to and from Strings of 0s and
1s.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
+ QuotedPrintableCodec: Codec for RFC 1521 MIME (Multipurpose Internet
+ Mail Extensions) Part One. Rules #3, #4, and #5 of the
quoted-printable spec
+ are not implemented yet. See also issue CODEC-46.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
+ BCodec: Identical to the Base64 encoding defined by RFC 1521 and
allows a
+ character set to be specified.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-41" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
+ QCodec: Similar to the Quoted-Printable content-transfer-encoding
defined
+ in RFC 1521 and designed to allow text containing mostly ASCII
characters to
+ be decipherable on an ASCII terminal without decoding.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew
Inger" due-to-email="[email protected]">
+ Soundex: Implemented the DIFFERENCE algorithm.
+ </action>
+ <action dev="ggregory" type="add" issue="CODEC-45" due-to="Matthew
Inger" due-to-email="[email protected]">
+ RefinedSoundex: Implemented the DIFFERENCE algorithm.
+ </action>
+ <action dev="ggregory" type="update">
+ This version is relesed under the
+ <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License
2.0</a>
+ , please see LICENSE.txt. Previous versions were released under the
+ <a href="http://www.apache.org/licenses/LICENSE-1.1">Apache License
1.1</a>
+ </action>
+ <action dev="ggregory" type="update">
+ The Board recommendation to remove Javadoc author tags has been
+ implemented. All author tags are now "Apache Software Foundation".
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-25" due-to="Oleg
Kalnichevski" due-to-email="[email protected]">
+ The default URL encoding logic was broken.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-31" due-to="Gary D.
Gregory">
+ Base64 chunked encoding not compliant with RFC 2045 section 2.1 CRLF.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-5">
+ Hex converts illegal characters to 255.
+ </action>
+ <action dev="tobrien" type="fix" issue="CODEC-17">
+ Metaphone now correctly handles a silent B in a word that ends in MB.
+ "COMB" is encoded as "KM", before this fix "COMB" was encoded as "KMB".
+ </action>
+ <action dev="ggregory" type="fix">
+ Added missing tags in Javadoc comments.
+ </action>
+ <action dev="ggregory" type="fix">
+ General Javadoc improvements.
+ </action>
+ </release>
+ <release version="1.2" date="24 Nov 2003" description="Feature and fix
release.">
+ <action dev="tobrien" type="add" due-to="Oleg Kalnichevski"
due-to-email="[email protected]">
+ URLCodec: Implements the www-form-urlencoded encoding scheme.
+ </action>
+ <action dev="tobrien" type="add" due-to="Dave Dribin, David Graham">
+ DigestUtils: Calculates MD5 and SHA digests.
+ </action>
+ <action dev="tobrien" type="fix" issue="CODEC-26" due-to="Brian Ewins">
+ Modified Base64 to remedy non-compliance with RFC
+ 2045. Non-Base64 characters were not being discarded during the
+ decode. RFC 2045 explicitly states that all characters outside of the
+ base64 alphabet are to be ignored.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-4">
+ Hex.decode(Object) throws a ClassCastException when a String argument
is passed in.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-3">
+ Soundex: The HW rule is not applied; hyphens and apostrophes are not
ignored.
+ </action>
+ <action dev="ggregory" type="fix" issue="CODEC-29">
+ Soundex.setMaxLength causes bugs and is not needed.
+ Calling Soundex.setMaxLength() with a value of 2 or less causes the
wrong
+ answer to be returned. Since the encoding returned by Soundex is always
+ of length 4 by definition (we do not use the '-' in as a letter-nnn
+ separator) the need for a maxLength attribute is not needed. Deprecate
+ the field and accessor methods.
+ </action>
+ <action dev="Members" type="fix">
+ Fix in Metaphone relating to the handling of the maximum code length.
+ </action>
+ </release>
+ <release version="1.1" date="29 April 2003"
+ description="The first official release. Numerous projects had been
depending on version 1.0-dev while in the Sandbox.">
+ <action dev="Members" type="add">
+ A newer version of the Base64 class reflecting improvements from
+ both the commons-httpclient and xml-rpc versions of code forked
+ from catalina.
+ </action>
+ <action dev="Members" type="add">
+ Base64 class from commons-httpclient in org.apache.commons.codec.base64
+ has been retained for backwards compatibility but has been deprecated.
+ </action>
+ <action dev="Members" type="add">
+ Soundex class from commons-util in org.apache.commons.codec.
+ </action>
+ <action dev="Members" type="add">
+ Metaphone class from commons-util in org.apache.commons.codec.
+ </action>
+ <action dev="tobrien" type="add">
+ RefinedSoundex class in org.apache.commons.codec.
+ </action>
+ <action dev="Members" type="add">
+ Encoder/Decoder interfaces in org.apache.commons.
+ </action>
+ <action dev="Members" type="add">
+ String and Binary specific Encoder/Decoder interfaces in
org.apache.commons.
+ </action>
+ <action dev="Members" type="add">
+ StringEncoderComparator replaces the SoundexComparator from the
language package.
+ </action>
+ <action dev="Members" type="fix">
+ Base64 now discards whitespace characters when decoding encoded
content.
+ </action>
+ </release>
+ <release version="1.0-dev" date="25 April 2003" description="Last release
from the Sandbox.">
+ <action dev="tobrien" type="add">
+ Base64 class from commons-httpclient in
org.apache.commons.codec.base64.
+ </action>
+ <action dev="tobrien" type="add">
+ Soundex class from commons-util in org.apache.commons.codec.
+ </action>
+ <action dev="tobrien" type="add">
+ Metaphone class from commons-util in org.apache.commons.codec.
+ </action>
+ <action dev="Members" type="add">
+ SoundexComparator class from commons-util in org.apache.commons.codec.
+ </action>
+ </release>
+ </body>
+</document>
diff --git
a/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java
b/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java
index 692e0df..96ea4ed 100644
--- a/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java
+++ b/src/main/java/org/apache/commons/codec/language/DaitchMokotoffSoundex.java
@@ -1,553 +1,553 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.codec.language;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Scanner;
-import java.util.Set;
-
-import org.apache.commons.codec.CharEncoding;
-import org.apache.commons.codec.EncoderException;
-import org.apache.commons.codec.Resources;
-import org.apache.commons.codec.StringEncoder;
-
-/**
- * Encodes a string into a Daitch-Mokotoff Soundex value.
- * <p>
- * The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and
American Soundex algorithms, yielding greater
- * accuracy in matching especially Slavish and Yiddish surnames with similar
pronunciation but differences in spelling.
- * </p>
- * <p>
- * The main differences compared to the other soundex variants are:
- * </p>
- * <ul>
- * <li>coded names are 6 digits long
- * <li>the initial character of the name is coded
- * <li>rules to encoded multi-character n-grams
- * <li>multiple possible encodings for the same name (branching)
- * </ul>
- * <p>
- * This implementation supports branching, depending on the used method:
- * <ul>
- * <li>{@link #encode(String)} - branching disabled, only the first code will
be returned
- * <li>{@link #soundex(String)} - branching enabled, all codes will be
returned, separated by '|'
- * </ul>
- * <p>
- * Note: this implementation has additional branching rules compared to the
original description of the algorithm. The
- * rules can be customized by overriding the default rules contained in the
resource file
- * {@code org/apache/commons/codec/language/dmrules.txt}.
- * </p>
- * <p>
- * This class is thread-safe.
- * </p>
- *
- * @see Soundex
- * @see <a
href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia
- Daitch-Mokotoff Soundex</a>
- * @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing
and Genealogy</a>
- *
- * @version $Id$
- * @since 1.10
- */
-public class DaitchMokotoffSoundex implements StringEncoder {
-
- /**
- * Inner class representing a branch during DM soundex encoding.
- */
- private static final class Branch {
- private final StringBuilder builder;
- private String cachedString;
- private String lastReplacement;
-
- private Branch() {
- builder = new StringBuilder();
- lastReplacement = null;
- cachedString = null;
- }
-
- /**
- * Creates a new branch, identical to this branch.
- *
- * @return a new, identical branch
- */
- public Branch createBranch() {
- final Branch branch = new Branch();
- branch.builder.append(toString());
- branch.lastReplacement = this.lastReplacement;
- return branch;
- }
-
- @Override
- public boolean equals(final Object other) {
- if (this == other) {
- return true;
- }
- if (!(other instanceof Branch)) {
- return false;
- }
-
- return toString().equals(((Branch) other).toString());
- }
-
- /**
- * Finish this branch by appending '0's until the maximum code length
has been reached.
- */
- public void finish() {
- while (builder.length() < MAX_LENGTH) {
- builder.append('0');
- cachedString = null;
- }
- }
-
- @Override
- public int hashCode() {
- return toString().hashCode();
- }
-
- /**
- * Process the next replacement to be added to this branch.
- *
- * @param replacement
- * the next replacement to append
- * @param forceAppend
- * indicates if the default processing shall be overridden
- */
- public void processNextReplacement(final String replacement, final
boolean forceAppend) {
- final boolean append = lastReplacement == null ||
!lastReplacement.endsWith(replacement) || forceAppend;
-
- if (append && builder.length() < MAX_LENGTH) {
- builder.append(replacement);
- // remove all characters after the maximum length
- if (builder.length() > MAX_LENGTH) {
- builder.delete(MAX_LENGTH, builder.length());
- }
- cachedString = null;
- }
-
- lastReplacement = replacement;
- }
-
- @Override
- public String toString() {
- if (cachedString == null) {
- cachedString = builder.toString();
- }
- return cachedString;
- }
- }
-
- /**
- * Inner class for storing rules.
- */
- private static final class Rule {
- private final String pattern;
- private final String[] replacementAtStart;
- private final String[] replacementBeforeVowel;
- private final String[] replacementDefault;
-
- protected Rule(final String pattern, final String replacementAtStart,
final String replacementBeforeVowel,
- final String replacementDefault) {
- this.pattern = pattern;
- this.replacementAtStart = replacementAtStart.split("\\|");
- this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
- this.replacementDefault = replacementDefault.split("\\|");
- }
-
- public int getPatternLength() {
- return pattern.length();
- }
-
- public String[] getReplacements(final String context, final boolean
atStart) {
- if (atStart) {
- return replacementAtStart;
- }
-
- final int nextIndex = getPatternLength();
- final boolean nextCharIsVowel = nextIndex < context.length() ?
isVowel(context.charAt(nextIndex)) : false;
- if (nextCharIsVowel) {
- return replacementBeforeVowel;
- }
-
- return replacementDefault;
- }
-
- private boolean isVowel(final char ch) {
- return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch ==
'u';
- }
-
- public boolean matches(final String context) {
- return context.startsWith(pattern);
- }
-
- @Override
- public String toString() {
- return String.format("%s=(%s,%s,%s)", pattern,
Arrays.asList(replacementAtStart),
- Arrays.asList(replacementBeforeVowel),
Arrays.asList(replacementDefault));
- }
- }
-
- private static final String COMMENT = "//";
- private static final String DOUBLE_QUOTE = "\"";
-
- private static final String MULTILINE_COMMENT_END = "*/";
-
- private static final String MULTILINE_COMMENT_START = "/*";
-
- /** The resource file containing the replacement and folding rules */
- private static final String RESOURCE_FILE =
"org/apache/commons/codec/language/dmrules.txt";
-
- /** The code length of a DM soundex value. */
- private static final int MAX_LENGTH = 6;
-
- /** Transformation rules indexed by the first character of their pattern.
*/
- private static final Map<Character, List<Rule>> RULES = new HashMap<>();
-
- /** Folding rules. */
- private static final Map<Character, Character> FOLDINGS = new HashMap<>();
-
- static {
- try (final Scanner scanner = new
Scanner(Resources.getInputStream(RESOURCE_FILE), CharEncoding.UTF_8)) {
- parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
- }
-
- // sort RULES by pattern length in descending order
- for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
- final List<Rule> ruleList = rule.getValue();
- Collections.sort(ruleList, new Comparator<Rule>() {
- @Override
- public int compare(final Rule rule1, final Rule rule2) {
- return rule2.getPatternLength() - rule1.getPatternLength();
- }
- });
- }
- }
-
- private static void parseRules(final Scanner scanner, final String
location,
- final Map<Character, List<Rule>> ruleMapping, final Map<Character,
Character> asciiFoldings) {
- int currentLine = 0;
- boolean inMultilineComment = false;
-
- while (scanner.hasNextLine()) {
- currentLine++;
- final String rawLine = scanner.nextLine();
- String line = rawLine;
-
- if (inMultilineComment) {
- if (line.endsWith(MULTILINE_COMMENT_END)) {
- inMultilineComment = false;
- }
- continue;
- }
-
- if (line.startsWith(MULTILINE_COMMENT_START)) {
- inMultilineComment = true;
- } else {
- // discard comments
- final int cmtI = line.indexOf(COMMENT);
- if (cmtI >= 0) {
- line = line.substring(0, cmtI);
- }
-
- // trim leading-trailing whitespace
- line = line.trim();
-
- if (line.length() == 0) {
- continue; // empty lines can be safely skipped
- }
-
- if (line.contains("=")) {
- // folding
- final String[] parts = line.split("=");
- if (parts.length != 2) {
- throw new IllegalArgumentException("Malformed folding
statement split into " + parts.length +
- " parts: " + rawLine + " in " + location);
- }
- final String leftCharacter = parts[0];
- final String rightCharacter = parts[1];
-
- if (leftCharacter.length() != 1 || rightCharacter.length()
!= 1) {
- throw new IllegalArgumentException("Malformed folding
statement - " +
- "patterns are not single characters: " +
rawLine + " in " + location);
- }
-
- asciiFoldings.put(leftCharacter.charAt(0),
rightCharacter.charAt(0));
- } else {
- // rule
- final String[] parts = line.split("\\s+");
- if (parts.length != 4) {
- throw new IllegalArgumentException("Malformed rule
statement split into " + parts.length +
- " parts: " + rawLine + " in " + location);
- }
- try {
- final String pattern = stripQuotes(parts[0]);
- final String replacement1 = stripQuotes(parts[1]);
- final String replacement2 = stripQuotes(parts[2]);
- final String replacement3 = stripQuotes(parts[3]);
-
- final Rule r = new Rule(pattern, replacement1,
replacement2, replacement3);
- final char patternKey = r.pattern.charAt(0);
- List<Rule> rules = ruleMapping.get(patternKey);
- if (rules == null) {
- rules = new ArrayList<>();
- ruleMapping.put(patternKey, rules);
- }
- rules.add(r);
- } catch (final IllegalArgumentException e) {
- throw new IllegalStateException(
- "Problem parsing line '" + currentLine + "' in
" + location, e);
- }
- }
- }
- }
- }
-
- private static String stripQuotes(String str) {
- if (str.startsWith(DOUBLE_QUOTE)) {
- str = str.substring(1);
- }
-
- if (str.endsWith(DOUBLE_QUOTE)) {
- str = str.substring(0, str.length() - 1);
- }
-
- return str;
- }
-
- /** Whether to use ASCII folding prior to encoding. */
- private final boolean folding;
-
- /**
- * Creates a new instance with ASCII-folding enabled.
- */
- public DaitchMokotoffSoundex() {
- this(true);
- }
-
- /**
- * Creates a new instance.
- * <p>
- * With ASCII-folding enabled, certain accented characters will be
transformed to equivalent ASCII characters, e.g.
- * è -> e.
- * </p>
- *
- * @param folding
- * if ASCII-folding shall be performed before encoding
- */
- public DaitchMokotoffSoundex(final boolean folding) {
- this.folding = folding;
- }
-
- /**
- * Performs a cleanup of the input string before the actual soundex
transformation.
- * <p>
- * Removes all whitespace characters and performs ASCII folding if enabled.
- * </p>
- *
- * @param input
- * the input string to cleanup
- * @return a cleaned up string
- */
- private String cleanup(final String input) {
- final StringBuilder sb = new StringBuilder();
- for (char ch : input.toCharArray()) {
- if (Character.isWhitespace(ch)) {
- continue;
- }
-
- ch = Character.toLowerCase(ch);
- if (folding && FOLDINGS.containsKey(ch)) {
- ch = FOLDINGS.get(ch);
- }
- sb.append(ch);
- }
- return sb.toString();
- }
-
- /**
- * Encodes an Object using the Daitch-Mokotoff soundex algorithm without
branching.
- * <p>
- * This method is provided in order to satisfy the requirements of the
Encoder interface, and will throw an
- * EncoderException if the supplied object is not of type java.lang.String.
- * </p>
- *
- * @see #soundex(String)
- *
- * @param obj
- * Object to encode
- * @return An object (of type java.lang.String) containing the DM soundex
code, which corresponds to the String
- * supplied.
- * @throws EncoderException
- * if the parameter supplied is not of type java.lang.String
- * @throws IllegalArgumentException
- * if a character is not mapped
- */
- @Override
- public Object encode(final Object obj) throws EncoderException {
- if (!(obj instanceof String)) {
- throw new EncoderException(
- "Parameter supplied to DaitchMokotoffSoundex encode is not
of type java.lang.String");
- }
- return encode((String) obj);
- }
-
- /**
- * Encodes a String using the Daitch-Mokotoff soundex algorithm without
branching.
- *
- * @see #soundex(String)
- *
- * @param source
- * A String object to encode
- * @return A DM Soundex code corresponding to the String supplied
- * @throws IllegalArgumentException
- * if a character is not mapped
- */
- @Override
- public String encode(final String source) {
- if (source == null) {
- return null;
- }
- return soundex(source, false)[0];
- }
-
- /**
- * Encodes a String using the Daitch-Mokotoff soundex algorithm with
branching.
- * <p>
- * In case a string is encoded into multiple codes (see branching rules),
the result will contain all codes,
- * separated by '|'.
- * </p>
- * <p>
- * Example: the name "AUERBACH" is encoded as both
- * </p>
- * <ul>
- * <li>097400</li>
- * <li>097500</li>
- * </ul>
- * <p>
- * Thus the result will be "097400|097500".
- * </p>
- *
- * @param source
- * A String object to encode
- * @return A string containing a set of DM Soundex codes corresponding to
the String supplied
- * @throws IllegalArgumentException
- * if a character is not mapped
- */
- public String soundex(final String source) {
- final String[] branches = soundex(source, true);
- final StringBuilder sb = new StringBuilder();
- int index = 0;
- for (final String branch : branches) {
- sb.append(branch);
- if (++index < branches.length) {
- sb.append('|');
- }
- }
- return sb.toString();
- }
-
- /**
- * Perform the actual DM Soundex algorithm on the input string.
- *
- * @param source
- * A String object to encode
- * @param branching
- * If branching shall be performed
- * @return A string array containing all DM Soundex codes corresponding to
the String supplied depending on the
- * selected branching mode
- */
- private String[] soundex(final String source, final boolean branching) {
- if (source == null) {
- return null;
- }
-
- final String input = cleanup(source);
-
- final Set<Branch> currentBranches = new LinkedHashSet<>();
- currentBranches.add(new Branch());
-
- char lastChar = '\0';
- for (int index = 0; index < input.length(); index++) {
- final char ch = input.charAt(index);
-
- // ignore whitespace inside a name
- if (Character.isWhitespace(ch)) {
- continue;
- }
-
- final String inputContext = input.substring(index);
- final List<Rule> rules = RULES.get(ch);
- if (rules == null) {
- continue;
- }
-
- // use an EMPTY_LIST to avoid false positive warnings wrt
potential null pointer access
- final List<Branch> nextBranches = branching ? new
ArrayList<Branch>() : Collections.<Branch>emptyList();
-
- for (final Rule rule : rules) {
- if (rule.matches(inputContext)) {
- if (branching) {
- nextBranches.clear();
- }
- final String[] replacements =
rule.getReplacements(inputContext, lastChar == '\0');
- final boolean branchingRequired = replacements.length > 1
&& branching;
-
- for (final Branch branch : currentBranches) {
- for (final String nextReplacement : replacements) {
- // if we have multiple replacements, always create
a new branch
- final Branch nextBranch = branchingRequired ?
branch.createBranch() : branch;
-
- // special rule: occurrences of mn or nm are
treated differently
- final boolean force = (lastChar == 'm' && ch ==
'n') || (lastChar == 'n' && ch == 'm');
-
- nextBranch.processNextReplacement(nextReplacement,
force);
-
- if (branching) {
- nextBranches.add(nextBranch);
- } else {
- break;
- }
- }
- }
-
- if (branching) {
- currentBranches.clear();
- currentBranches.addAll(nextBranches);
- }
- index += rule.getPatternLength() - 1;
- break;
- }
- }
-
- lastChar = ch;
- }
-
- final String[] result = new String[currentBranches.size()];
- int index = 0;
- for (final Branch branch : currentBranches) {
- branch.finish();
- result[index++] = branch.toString();
- }
-
- return result;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.codec.language;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+import java.util.Set;
+
+import org.apache.commons.codec.CharEncoding;
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.Resources;
+import org.apache.commons.codec.StringEncoder;
+
+/**
+ * Encodes a string into a Daitch-Mokotoff Soundex value.
+ * <p>
+ * The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and
American Soundex algorithms, yielding greater
+ * accuracy in matching especially Slavish and Yiddish surnames with similar
pronunciation but differences in spelling.
+ * </p>
+ * <p>
+ * The main differences compared to the other soundex variants are:
+ * </p>
+ * <ul>
+ * <li>coded names are 6 digits long
+ * <li>the initial character of the name is coded
+ * <li>rules to encoded multi-character n-grams
+ * <li>multiple possible encodings for the same name (branching)
+ * </ul>
+ * <p>
+ * This implementation supports branching, depending on the used method:
+ * <ul>
+ * <li>{@link #encode(String)} - branching disabled, only the first code will
be returned
+ * <li>{@link #soundex(String)} - branching enabled, all codes will be
returned, separated by '|'
+ * </ul>
+ * <p>
+ * Note: this implementation has additional branching rules compared to the
original description of the algorithm. The
+ * rules can be customized by overriding the default rules contained in the
resource file
+ * {@code org/apache/commons/codec/language/dmrules.txt}.
+ * </p>
+ * <p>
+ * This class is thread-safe.
+ * </p>
+ *
+ * @see Soundex
+ * @see <a
href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia
- Daitch-Mokotoff Soundex</a>
+ * @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing
and Genealogy</a>
+ *
+ * @version $Id$
+ * @since 1.10
+ */
+public class DaitchMokotoffSoundex implements StringEncoder {
+
+ /**
+ * Inner class representing a branch during DM soundex encoding.
+ */
+ private static final class Branch {
+ private final StringBuilder builder;
+ private String cachedString;
+ private String lastReplacement;
+
+ private Branch() {
+ builder = new StringBuilder();
+ lastReplacement = null;
+ cachedString = null;
+ }
+
+ /**
+ * Creates a new branch, identical to this branch.
+ *
+ * @return a new, identical branch
+ */
+ public Branch createBranch() {
+ final Branch branch = new Branch();
+ branch.builder.append(toString());
+ branch.lastReplacement = this.lastReplacement;
+ return branch;
+ }
+
+ @Override
+ public boolean equals(final Object other) {
+ if (this == other) {
+ return true;
+ }
+ if (!(other instanceof Branch)) {
+ return false;
+ }
+
+ return toString().equals(((Branch) other).toString());
+ }
+
+ /**
+ * Finish this branch by appending '0's until the maximum code length
has been reached.
+ */
+ public void finish() {
+ while (builder.length() < MAX_LENGTH) {
+ builder.append('0');
+ cachedString = null;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ return toString().hashCode();
+ }
+
+ /**
+ * Process the next replacement to be added to this branch.
+ *
+ * @param replacement
+ * the next replacement to append
+ * @param forceAppend
+ * indicates if the default processing shall be overridden
+ */
+ public void processNextReplacement(final String replacement, final
boolean forceAppend) {
+ final boolean append = lastReplacement == null ||
!lastReplacement.endsWith(replacement) || forceAppend;
+
+ if (append && builder.length() < MAX_LENGTH) {
+ builder.append(replacement);
+ // remove all characters after the maximum length
+ if (builder.length() > MAX_LENGTH) {
+ builder.delete(MAX_LENGTH, builder.length());
+ }
+ cachedString = null;
+ }
+
+ lastReplacement = replacement;
+ }
+
+ @Override
+ public String toString() {
+ if (cachedString == null) {
+ cachedString = builder.toString();
+ }
+ return cachedString;
+ }
+ }
+
+ /**
+ * Inner class for storing rules.
+ */
+ private static final class Rule {
+ private final String pattern;
+ private final String[] replacementAtStart;
+ private final String[] replacementBeforeVowel;
+ private final String[] replacementDefault;
+
+ protected Rule(final String pattern, final String replacementAtStart,
final String replacementBeforeVowel,
+ final String replacementDefault) {
+ this.pattern = pattern;
+ this.replacementAtStart = replacementAtStart.split("\\|");
+ this.replacementBeforeVowel = replacementBeforeVowel.split("\\|");
+ this.replacementDefault = replacementDefault.split("\\|");
+ }
+
+ public int getPatternLength() {
+ return pattern.length();
+ }
+
+ public String[] getReplacements(final String context, final boolean
atStart) {
+ if (atStart) {
+ return replacementAtStart;
+ }
+
+ final int nextIndex = getPatternLength();
+ final boolean nextCharIsVowel = nextIndex < context.length() ?
isVowel(context.charAt(nextIndex)) : false;
+ if (nextCharIsVowel) {
+ return replacementBeforeVowel;
+ }
+
+ return replacementDefault;
+ }
+
+ private boolean isVowel(final char ch) {
+ return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch ==
'u';
+ }
+
+ public boolean matches(final String context) {
+ return context.startsWith(pattern);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s=(%s,%s,%s)", pattern,
Arrays.asList(replacementAtStart),
+ Arrays.asList(replacementBeforeVowel),
Arrays.asList(replacementDefault));
+ }
+ }
+
+ private static final String COMMENT = "//";
+ private static final String DOUBLE_QUOTE = "\"";
+
+ private static final String MULTILINE_COMMENT_END = "*/";
+
+ private static final String MULTILINE_COMMENT_START = "/*";
+
+ /** The resource file containing the replacement and folding rules */
+ private static final String RESOURCE_FILE =
"org/apache/commons/codec/language/dmrules.txt";
+
+ /** The code length of a DM soundex value. */
+ private static final int MAX_LENGTH = 6;
+
+ /** Transformation rules indexed by the first character of their pattern.
*/
+ private static final Map<Character, List<Rule>> RULES = new HashMap<>();
+
+ /** Folding rules. */
+ private static final Map<Character, Character> FOLDINGS = new HashMap<>();
+
+ static {
+ try (final Scanner scanner = new
Scanner(Resources.getInputStream(RESOURCE_FILE), CharEncoding.UTF_8)) {
+ parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS);
+ }
+
+ // sort RULES by pattern length in descending order
+ for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) {
+ final List<Rule> ruleList = rule.getValue();
+ Collections.sort(ruleList, new Comparator<Rule>() {
+ @Override
+ public int compare(final Rule rule1, final Rule rule2) {
+ return rule2.getPatternLength() - rule1.getPatternLength();
+ }
+ });
+ }
+ }
+
+ private static void parseRules(final Scanner scanner, final String
location,
+ final Map<Character, List<Rule>> ruleMapping, final Map<Character,
Character> asciiFoldings) {
+ int currentLine = 0;
+ boolean inMultilineComment = false;
+
+ while (scanner.hasNextLine()) {
+ currentLine++;
+ final String rawLine = scanner.nextLine();
+ String line = rawLine;
+
+ if (inMultilineComment) {
+ if (line.endsWith(MULTILINE_COMMENT_END)) {
+ inMultilineComment = false;
+ }
+ continue;
+ }
+
+ if (line.startsWith(MULTILINE_COMMENT_START)) {
+ inMultilineComment = true;
+ } else {
+ // discard comments
+ final int cmtI = line.indexOf(COMMENT);
+ if (cmtI >= 0) {
+ line = line.substring(0, cmtI);
+ }
+
+ // trim leading-trailing whitespace
+ line = line.trim();
+
+ if (line.length() == 0) {
+ continue; // empty lines can be safely skipped
+ }
+
+ if (line.contains("=")) {
+ // folding
+ final String[] parts = line.split("=");
+ if (parts.length != 2) {
+ throw new IllegalArgumentException("Malformed folding
statement split into " + parts.length +
+ " parts: " + rawLine + " in " + location);
+ }
+ final String leftCharacter = parts[0];
+ final String rightCharacter = parts[1];
+
+ if (leftCharacter.length() != 1 || rightCharacter.length()
!= 1) {
+ throw new IllegalArgumentException("Malformed folding
statement - " +
+ "patterns are not single characters: " +
rawLine + " in " + location);
+ }
+
+ asciiFoldings.put(leftCharacter.charAt(0),
rightCharacter.charAt(0));
+ } else {
+ // rule
+ final String[] parts = line.split("\\s+");
+ if (parts.length != 4) {
+ throw new IllegalArgumentException("Malformed rule
statement split into " + parts.length +
+ " parts: " + rawLine + " in " + location);
+ }
+ try {
+ final String pattern = stripQuotes(parts[0]);
+ final String replacement1 = stripQuotes(parts[1]);
+ final String replacement2 = stripQuotes(parts[2]);
+ final String replacement3 = stripQuotes(parts[3]);
+
+ final Rule r = new Rule(pattern, replacement1,
replacement2, replacement3);
+ final char patternKey = r.pattern.charAt(0);
+ List<Rule> rules = ruleMapping.get(patternKey);
+ if (rules == null) {
+ rules = new ArrayList<>();
+ ruleMapping.put(patternKey, rules);
+ }
+ rules.add(r);
+ } catch (final IllegalArgumentException e) {
+ throw new IllegalStateException(
+ "Problem parsing line '" + currentLine + "' in
" + location, e);
+ }
+ }
+ }
+ }
+ }
+
+ private static String stripQuotes(String str) {
+ if (str.startsWith(DOUBLE_QUOTE)) {
+ str = str.substring(1);
+ }
+
+ if (str.endsWith(DOUBLE_QUOTE)) {
+ str = str.substring(0, str.length() - 1);
+ }
+
+ return str;
+ }
+
+ /** Whether to use ASCII folding prior to encoding. */
+ private final boolean folding;
+
+ /**
+ * Creates a new instance with ASCII-folding enabled.
+ */
+ public DaitchMokotoffSoundex() {
+ this(true);
+ }
+
+ /**
+ * Creates a new instance.
+ * <p>
+ * With ASCII-folding enabled, certain accented characters will be
transformed to equivalent ASCII characters, e.g.
+ * è -> e.
+ * </p>
+ *
+ * @param folding
+ * if ASCII-folding shall be performed before encoding
+ */
+ public DaitchMokotoffSoundex(final boolean folding) {
+ this.folding = folding;
+ }
+
+ /**
+ * Performs a cleanup of the input string before the actual soundex
transformation.
+ * <p>
+ * Removes all whitespace characters and performs ASCII folding if enabled.
+ * </p>
+ *
+ * @param input
+ * the input string to cleanup
+ * @return a cleaned up string
+ */
+ private String cleanup(final String input) {
+ final StringBuilder sb = new StringBuilder();
+ for (char ch : input.toCharArray()) {
+ if (Character.isWhitespace(ch)) {
+ continue;
+ }
+
+ ch = Character.toLowerCase(ch);
+ if (folding && FOLDINGS.containsKey(ch)) {
+ ch = FOLDINGS.get(ch);
+ }
+ sb.append(ch);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Encodes an Object using the Daitch-Mokotoff soundex algorithm without
branching.
+ * <p>
+ * This method is provided in order to satisfy the requirements of the
Encoder interface, and will throw an
+ * EncoderException if the supplied object is not of type java.lang.String.
+ * </p>
+ *
+ * @see #soundex(String)
+ *
+ * @param obj
+ * Object to encode
+ * @return An object (of type java.lang.String) containing the DM soundex
code, which corresponds to the String
+ * supplied.
+ * @throws EncoderException
+ * if the parameter supplied is not of type java.lang.String
+ * @throws IllegalArgumentException
+ * if a character is not mapped
+ */
+ @Override
+ public Object encode(final Object obj) throws EncoderException {
+ if (!(obj instanceof String)) {
+ throw new EncoderException(
+ "Parameter supplied to DaitchMokotoffSoundex encode is not
of type java.lang.String");
+ }
+ return encode((String) obj);
+ }
+
+ /**
+ * Encodes a String using the Daitch-Mokotoff soundex algorithm without
branching.
+ *
+ * @see #soundex(String)
+ *
+ * @param source
+ * A String object to encode
+ * @return A DM Soundex code corresponding to the String supplied
+ * @throws IllegalArgumentException
+ * if a character is not mapped
+ */
+ @Override
+ public String encode(final String source) {
+ if (source == null) {
+ return null;
+ }
+ return soundex(source, false)[0];
+ }
+
+ /**
+ * Encodes a String using the Daitch-Mokotoff soundex algorithm with
branching.
+ * <p>
+ * In case a string is encoded into multiple codes (see branching rules),
the result will contain all codes,
+ * separated by '|'.
+ * </p>
+ * <p>
+ * Example: the name "AUERBACH" is encoded as both
+ * </p>
+ * <ul>
+ * <li>097400</li>
+ * <li>097500</li>
+ * </ul>
+ * <p>
+ * Thus the result will be "097400|097500".
+ * </p>
+ *
+ * @param source
+ * A String object to encode
+ * @return A string containing a set of DM Soundex codes corresponding to
the String supplied
+ * @throws IllegalArgumentException
+ * if a character is not mapped
+ */
+ public String soundex(final String source) {
+ final String[] branches = soundex(source, true);
+ final StringBuilder sb = new StringBuilder();
+ int index = 0;
+ for (final String branch : branches) {
+ sb.append(branch);
+ if (++index < branches.length) {
+ sb.append('|');
+ }
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Perform the actual DM Soundex algorithm on the input string.
+ *
+ * @param source
+ * A String object to encode
+ * @param branching
+ * If branching shall be performed
+ * @return A string array containing all DM Soundex codes corresponding to
the String supplied depending on the
+ * selected branching mode
+ */
+ private String[] soundex(final String source, final boolean branching) {
+ if (source == null) {
+ return null;
+ }
+
+ final String input = cleanup(source);
+
+ final Set<Branch> currentBranches = new LinkedHashSet<>();
+ currentBranches.add(new Branch());
+
+ char lastChar = '\0';
+ for (int index = 0; index < input.length(); index++) {
+ final char ch = input.charAt(index);
+
+ // ignore whitespace inside a name
+ if (Character.isWhitespace(ch)) {
+ continue;
+ }
+
+ final String inputContext = input.substring(index);
+ final List<Rule> rules = RULES.get(ch);
+ if (rules == null) {
+ continue;
+ }
+
+ // use an EMPTY_LIST to avoid false positive warnings wrt
potential null pointer access
+ final List<Branch> nextBranches = branching ? new ArrayList<>() :
Collections.<Branch>emptyList();
+
+ for (final Rule rule : rules) {
+ if (rule.matches(inputContext)) {
+ if (branching) {
+ nextBranches.clear();
+ }
+ final String[] replacements =
rule.getReplacements(inputContext, lastChar == '\0');
+ final boolean branchingRequired = replacements.length > 1
&& branching;
+
+ for (final Branch branch : currentBranches) {
+ for (final String nextReplacement : replacements) {
+ // if we have multiple replacements, always create
a new branch
+ final Branch nextBranch = branchingRequired ?
branch.createBranch() : branch;
+
+ // special rule: occurrences of mn or nm are
treated differently
+ final boolean force = (lastChar == 'm' && ch ==
'n') || (lastChar == 'n' && ch == 'm');
+
+ nextBranch.processNextReplacement(nextReplacement,
force);
+
+ if (branching) {
+ nextBranches.add(nextBranch);
+ } else {
+ break;
+ }
+ }
+ }
+
+ if (branching) {
+ currentBranches.clear();
+ currentBranches.addAll(nextBranches);
+ }
+ index += rule.getPatternLength() - 1;
+ break;
+ }
+ }
+
+ lastChar = ch;
+ }
+
+ final String[] result = new String[currentBranches.size()];
+ int index = 0;
+ for (final Branch branch : currentBranches) {
+ branch.finish();
+ result[index++] = branch.toString();
+ }
+
+ return result;
+ }
+}
diff --git
a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
index 737d2c9..76e257a 100644
--- a/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
+++ b/src/test/java/org/apache/commons/codec/language/ColognePhoneticTest.java
@@ -1,252 +1,252 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.codec.language;
-
-import java.util.HashSet;
-import java.util.Locale;
-import java.util.Set;
-
-import org.apache.commons.codec.EncoderException;
-import org.apache.commons.codec.StringEncoderAbstractTest;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Test;
-
-/**
- * Tests the <code>ColognePhonetic</code> class.
- *
- * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
- *
- */
-public class ColognePhoneticTest extends
StringEncoderAbstractTest<ColognePhonetic> {
-
- private static final Set<String> TESTSET = new HashSet<String>();
-
- private static boolean hasTestCase(String re) {
- for(String s : TESTSET) {
- if (s.matches(re)) {
- return true;
- }
- }
- return false;
- }
-
- // Character sequences to be tested by the code
- private static final String MATCHES[] = {
- ".*[AEIOUJY].*", // A, E, I, J, O, U, Y
- ".*H.*", // H
- ".*B.*", // B
- ".*P[^H].*", // P not before H
- ".*[DT][^CSZ].*", // D,T not before C,S,Z
- ".*[FVW].*", // F,V,W
- ".*PH.*", // P before H
- ".*[GKQ].*", // G,K,Q
- "C[AHKLOQRUX].*", // Initial C before A, H, K, L, O, Q, R,
U, X
- ".*[^SZ]C[AHKLOQRUX].*", // C before A, H, K, L, O, Q, R, U, X but
not after S, Z
- ".*[^CKQ]X.*", // X not after C,K,Q
- ".*L.*", // L
- ".*[MN].*", // M,N
- ".*R.*", // R
- ".*[SZ].*", // S,Z
- ".*[SZ]C.*", // C after S,Z
- "C[^AHKLOQRUX].*", // Initial C except before A, H, K, L, O,
Q, R, U, X
- ".+C[^AHKLOQRUX].*", // C except before A, H, K, L, O, Q, R,
U, X
- ".*[DT][CSZ].*", // D,T before C,S,Z
- ".*[CKQ]X.*", // X after C,K,Q
- };
-
- @AfterClass
- // Check that all possible input sequence conditions are represented
- public static void finishTests() {
- int errors = 0;
- for(String m : MATCHES) {
- if (!hasTestCase(m)) {
- System.out.println(m + " has no test case");
- errors++;
- }
- }
- Assert.assertEquals("Not expecting any missing test cases", 0, errors);
- }
-
- @Override
- // Capture test strings for later checking
- public void checkEncoding(String expected, String source) throws
EncoderException {
- // Note that the German letter Eszett is converted to SS by
toUpperCase, so we don't need to replace it
- TESTSET.add(source.toUpperCase(Locale.GERMAN).replace('Ä',
'A').replace('Ö', 'O').replace('Ü', 'U'));
- super.checkEncoding(expected, source);
- }
-
- @Override
- protected ColognePhonetic createStringEncoder() {
- return new ColognePhonetic();
- }
-
- @Test(expected=org.junit.ComparisonFailure.class)
- // Ensure that override still allows tests to work
- public void testCanFail() throws EncoderException {
- this.checkEncoding("/", "Fehler");
- }
-
- @Test
- public void testAabjoe() throws EncoderException {
- this.checkEncoding("01", "Aabjoe");
- }
-
- @Test
- public void testAaclan() throws EncoderException {
- this.checkEncoding("0856", "Aaclan");
- }
-
- /**
- * Tests [CODEC-122]
- *
- * @throws EncoderException
- */
- @Test
- public void testAychlmajrForCodec122() throws EncoderException {
- this.checkEncoding("04567", "Aychlmajr");
- }
-
- @Test
- public void testEdgeCases() throws EncoderException {
- final String[][] data = {
- {"a", "0"},
- {"e", "0"},
- {"i", "0"},
- {"o", "0"},
- {"u", "0"},
- {"\u00E4", "0"}, // a-umlaut
- {"\u00F6", "0"}, // o-umlaut
- {"\u00FC", "0"}, // u-umlaut
- {"\u00DF", "8"}, // small sharp s
- {"aa", "0"},
- {"ha", "0"},
- {"h", ""},
- {"aha", "0"},
- {"b", "1"},
- {"p", "1"},
- {"ph", "3"},
- {"f", "3"},
- {"v", "3"},
- {"w", "3"},
- {"g", "4"},
- {"k", "4"},
- {"q", "4"},
- {"x", "48"},
- {"ax", "048"},
- {"cx", "48"},
- {"l", "5"},
- {"cl", "45"},
- {"acl", "085"},
- {"mn", "6"},
- {"{mn}","6"}, // test chars above Z
- {"r", "7"}};
- this.checkEncodings(data);
- }
-
- @Test
- public void testExamples() throws EncoderException {
- final String[][] data = {
- {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
- {"m\u00FCller", "657"}, // müller - add equivalent lower-case
- {"schmidt", "862"},
- {"schneider", "8627"},
- {"fischer", "387"},
- {"weber", "317"},
- {"wagner", "3467"},
- {"becker", "147"},
- {"hoffmann", "0366"},
- {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
- {"sch\u00e4fer", "837"}, // schäfer - add equivalent lower-case
- {"Breschnew", "17863"},
- {"Wikipedia", "3412"},
- {"peter", "127"},
- {"pharma", "376"},
- {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
- {"deutsch", "28"},
- {"deutz", "28"},
- {"hamburg", "06174"},
- {"hannover", "0637"},
- {"christstollen", "478256"},
- {"Xanthippe", "48621"},
- {"Zacharias", "8478"},
- {"Holzbau", "0581"},
- {"matsch", "68"},
- {"matz", "68"},
- {"Arbeitsamt", "071862"},
- {"Eberhard", "01772"},
- {"Eberhardt", "01772"},
- {"Celsius", "8588"},
- {"Ace", "08"},
- {"heithabu", "021"}};
- this.checkEncodings(data);
- }
-
- @Test
- public void testHyphen() throws EncoderException {
- final String[][] data = {{"bergisch-gladbach", "174845214"},
- {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; //
Müller-Lüdenscheidt
- this.checkEncodings(data);
- }
-
- @Test
- public void testIsEncodeEquals() {
- //@formatter:off
- final String[][] data = {
- {"Muller", "M\u00fcller"}, // Müller
- {"Meyer", "Mayr"},
- {"house", "house"},
- {"House", "house"},
- {"Haus", "house"},
- {"ganz", "Gans"},
- {"ganz", "G\u00e4nse"}, // Gänse
- {"Miyagi", "Miyako"}};
- //@formatter:on
- for (final String[] element : data) {
- final boolean encodeEqual =
this.getStringEncoder().isEncodeEqual(element[1], element[0]);
- Assert.assertTrue(element[1] + " != " + element[0], encodeEqual);
- }
- }
-
- @Test
- public void testVariationsMella() throws EncoderException {
- final String data[] = {"mella", "milah", "moulla", "mellah", "muehle",
"mule"};
- this.checkEncodingVariations("65", data);
- }
-
- @Test
- public void testVariationsMeyer() throws EncoderException {
- final String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr",
"Mejer", "Major"};
- this.checkEncodingVariations("67", data);
- }
-
- @Test
- public void testSpecialCharsBetweenSameLetters() throws EncoderException {
- final String data[] = {"Test test", "Testtest", "Test-test",
"TesT#Test", "TesT?test"};
- this.checkEncodingVariations("28282", data);
- }
-
- // Allow command-line testing
- public static void main(String args[]) {
- ColognePhonetic coder = new ColognePhonetic();
- for(String arg : args) {
- String code = coder.encode(arg);
- System.out.println("'" + arg + "' = '" + code + "'");
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.codec.language;
+
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Set;
+
+import org.apache.commons.codec.EncoderException;
+import org.apache.commons.codec.StringEncoderAbstractTest;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Tests the <code>ColognePhonetic</code> class.
+ *
+ * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
+ *
+ */
+public class ColognePhoneticTest extends
StringEncoderAbstractTest<ColognePhonetic> {
+
+ private static final Set<String> TESTSET = new HashSet<>();
+
+ private static boolean hasTestCase(String re) {
+ for(String s : TESTSET) {
+ if (s.matches(re)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Character sequences to be tested by the code
+ private static final String MATCHES[] = {
+ ".*[AEIOUJY].*", // A, E, I, J, O, U, Y
+ ".*H.*", // H
+ ".*B.*", // B
+ ".*P[^H].*", // P not before H
+ ".*[DT][^CSZ].*", // D,T not before C,S,Z
+ ".*[FVW].*", // F,V,W
+ ".*PH.*", // P before H
+ ".*[GKQ].*", // G,K,Q
+ "C[AHKLOQRUX].*", // Initial C before A, H, K, L, O, Q, R,
U, X
+ ".*[^SZ]C[AHKLOQRUX].*", // C before A, H, K, L, O, Q, R, U, X but
not after S, Z
+ ".*[^CKQ]X.*", // X not after C,K,Q
+ ".*L.*", // L
+ ".*[MN].*", // M,N
+ ".*R.*", // R
+ ".*[SZ].*", // S,Z
+ ".*[SZ]C.*", // C after S,Z
+ "C[^AHKLOQRUX].*", // Initial C except before A, H, K, L, O,
Q, R, U, X
+ ".+C[^AHKLOQRUX].*", // C except before A, H, K, L, O, Q, R,
U, X
+ ".*[DT][CSZ].*", // D,T before C,S,Z
+ ".*[CKQ]X.*", // X after C,K,Q
+ };
+
+ @AfterClass
+ // Check that all possible input sequence conditions are represented
+ public static void finishTests() {
+ int errors = 0;
+ for(String m : MATCHES) {
+ if (!hasTestCase(m)) {
+ System.out.println(m + " has no test case");
+ errors++;
+ }
+ }
+ Assert.assertEquals("Not expecting any missing test cases", 0, errors);
+ }
+
+ @Override
+ // Capture test strings for later checking
+ public void checkEncoding(String expected, String source) throws
EncoderException {
+ // Note that the German letter Eszett is converted to SS by
toUpperCase, so we don't need to replace it
+ TESTSET.add(source.toUpperCase(Locale.GERMAN).replace('Ä',
'A').replace('Ö', 'O').replace('Ü', 'U'));
+ super.checkEncoding(expected, source);
+ }
+
+ @Override
+ protected ColognePhonetic createStringEncoder() {
+ return new ColognePhonetic();
+ }
+
+ @Test(expected=org.junit.ComparisonFailure.class)
+ // Ensure that override still allows tests to work
+ public void testCanFail() throws EncoderException {
+ this.checkEncoding("/", "Fehler");
+ }
+
+ @Test
+ public void testAabjoe() throws EncoderException {
+ this.checkEncoding("01", "Aabjoe");
+ }
+
+ @Test
+ public void testAaclan() throws EncoderException {
+ this.checkEncoding("0856", "Aaclan");
+ }
+
+ /**
+ * Tests [CODEC-122]
+ *
+ * @throws EncoderException
+ */
+ @Test
+ public void testAychlmajrForCodec122() throws EncoderException {
+ this.checkEncoding("04567", "Aychlmajr");
+ }
+
+ @Test
+ public void testEdgeCases() throws EncoderException {
+ final String[][] data = {
+ {"a", "0"},
+ {"e", "0"},
+ {"i", "0"},
+ {"o", "0"},
+ {"u", "0"},
+ {"\u00E4", "0"}, // a-umlaut
+ {"\u00F6", "0"}, // o-umlaut
+ {"\u00FC", "0"}, // u-umlaut
+ {"\u00DF", "8"}, // small sharp s
+ {"aa", "0"},
+ {"ha", "0"},
+ {"h", ""},
+ {"aha", "0"},
+ {"b", "1"},
+ {"p", "1"},
+ {"ph", "3"},
+ {"f", "3"},
+ {"v", "3"},
+ {"w", "3"},
+ {"g", "4"},
+ {"k", "4"},
+ {"q", "4"},
+ {"x", "48"},
+ {"ax", "048"},
+ {"cx", "48"},
+ {"l", "5"},
+ {"cl", "45"},
+ {"acl", "085"},
+ {"mn", "6"},
+ {"{mn}","6"}, // test chars above Z
+ {"r", "7"}};
+ this.checkEncodings(data);
+ }
+
+ @Test
+ public void testExamples() throws EncoderException {
+ final String[][] data = {
+ {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
+ {"m\u00FCller", "657"}, // müller - add equivalent lower-case
+ {"schmidt", "862"},
+ {"schneider", "8627"},
+ {"fischer", "387"},
+ {"weber", "317"},
+ {"wagner", "3467"},
+ {"becker", "147"},
+ {"hoffmann", "0366"},
+ {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
+ {"sch\u00e4fer", "837"}, // schäfer - add equivalent lower-case
+ {"Breschnew", "17863"},
+ {"Wikipedia", "3412"},
+ {"peter", "127"},
+ {"pharma", "376"},
+ {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
+ {"deutsch", "28"},
+ {"deutz", "28"},
+ {"hamburg", "06174"},
+ {"hannover", "0637"},
+ {"christstollen", "478256"},
+ {"Xanthippe", "48621"},
+ {"Zacharias", "8478"},
+ {"Holzbau", "0581"},
+ {"matsch", "68"},
+ {"matz", "68"},
+ {"Arbeitsamt", "071862"},
+ {"Eberhard", "01772"},
+ {"Eberhardt", "01772"},
+ {"Celsius", "8588"},
+ {"Ace", "08"},
+ {"heithabu", "021"}};
+ this.checkEncodings(data);
+ }
+
+ @Test
+ public void testHyphen() throws EncoderException {
+ final String[][] data = {{"bergisch-gladbach", "174845214"},
+ {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; //
Müller-Lüdenscheidt
+ this.checkEncodings(data);
+ }
+
+ @Test
+ public void testIsEncodeEquals() {
+ //@formatter:off
+ final String[][] data = {
+ {"Muller", "M\u00fcller"}, // Müller
+ {"Meyer", "Mayr"},
+ {"house", "house"},
+ {"House", "house"},
+ {"Haus", "house"},
+ {"ganz", "Gans"},
+ {"ganz", "G\u00e4nse"}, // Gänse
+ {"Miyagi", "Miyako"}};
+ //@formatter:on
+ for (final String[] element : data) {
+ final boolean encodeEqual =
this.getStringEncoder().isEncodeEqual(element[1], element[0]);
+ Assert.assertTrue(element[1] + " != " + element[0], encodeEqual);
+ }
+ }
+
+ @Test
+ public void testVariationsMella() throws EncoderException {
+ final String data[] = {"mella", "milah", "moulla", "mellah", "muehle",
"mule"};
+ this.checkEncodingVariations("65", data);
+ }
+
+ @Test
+ public void testVariationsMeyer() throws EncoderException {
+ final String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr",
"Mejer", "Major"};
+ this.checkEncodingVariations("67", data);
+ }
+
+ @Test
+ public void testSpecialCharsBetweenSameLetters() throws EncoderException {
+ final String data[] = {"Test test", "Testtest", "Test-test",
"TesT#Test", "TesT?test"};
+ this.checkEncodingVariations("28282", data);
+ }
+
+ // Allow command-line testing
+ public static void main(String args[]) {
+ ColognePhonetic coder = new ColognePhonetic();
+ for(String arg : args) {
+ String code = coder.encode(arg);
+ System.out.println("'" + arg + "' = '" + code + "'");
+ }
+ }
+}