There are some typos in the spelling of "violates". Gary
On Mar 8, 2012, at 15:57, "t...@apache.org" <t...@apache.org> wrote: > Author: tn > Date: Thu Mar 8 20:56:35 2012 > New Revision: 1298576 > > URL: http://svn.apache.org/viewvc?rev98576&view=rev > Log: > [CODEC-63] Added explanation for different results to dropby.com, Raised CC > to 100/100 > > Modified: > > commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java > > Modified: > commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java > URL: > http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java?rev98576&r198575&r298576&view=diff > =============================================================================--- > > commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java > (original) > +++ > commons/proper/codec/trunk/src/test/java/org/apache/commons/codec/language/NysiisTest.java > Thu Mar 8 20:56:35 2012 > @@ -49,6 +49,15 @@ public class NysiisTest extends StringEn > } > > @Test > + public void testTrueVariant() { > + Nysiis encoder = new Nysiis(true); > + > + String encoded = encoder.encode("WESTERLUND"); > + Assert.assertTrue(encoded.length() <= 6); > + Assert.assertEquals("WASTAR", encoded); > + } > + > + @Test > public void testBran() throws EncoderException { > encodeAll(new String[] { "Brian", "Brown", "Brun" }, "BRAN"); > } > @@ -71,6 +80,17 @@ public class NysiisTest extends StringEn > } > > @Test > + public void testSpecialBranches() throws EncoderException { > + this.encodeAll(new String[] { "Kobwick" }, "CABWAC"); > + this.encodeAll(new String[] { "Kocher" }, "CACAR"); > + this.encodeAll(new String[] { "Fesca" }, "FASC"); > + this.encodeAll(new String[] { "Shom" }, "SAN"); > + this.encodeAll(new String[] { "Ohlo" }, "OL"); > + this.encodeAll(new String[] { "Uhu" }, "UH"); > + this.encodeAll(new String[] { "Um" }, "UN"); > + } > + > + @Test > public void testDropBy() throws EncoderException { > List<String[]> testValues Arrays.asList( > @@ -112,16 +132,62 @@ public class NysiisTest extends StringEn > */ > @Test > public void testDropBy2() throws EncoderException { > + // Explanation of differences between this implementation and the > one at dropby.com. > + // > + // Algorithm (taken from www.dropby.com/NYSIIS.html): > + // > + // 1. Transcode first characters of name: > + // MAC » MCC > + // KN » NN > + // K » C > + // PH » FF > + // PF » FF > + // SCH » SSS > + // > + // 2. Transcode last characters of name: > + // EE, IE » Y > + // DT,RT,RD,NT,ND » D > + // > + // 3. First character of key = first character of name. > + // > + // 4. Transcode remaining characters by following these rules, > incrementing by one character each time: > + // 4a. EV » AF else A,E,I,O,U » A > + // 4b. Q » G > + // 4c. Z » S > + // 4d. M » N > + // 4e. KN » N else K » C > + // 4f. SCH » SSS > + // 4g. PH » FF > + // 4h. H » If previous or next is nonvowel, previous > + // 4i. W » If previous is vowel, previous > + // 4j. Add current to key if current != last key character > + // > + // 5. If last character is S, remove it > + // 6. If last characters are AY, replace with Y > + // 7. If last character is A, remove it > + // 8. Collapse all strings of repeated characters > + // 9. Add original first character of name as first character of key > + > List<String[]> testValues Arrays.asList( > // > http://www.dropby.com/indexLF.html?content=/NYSIIS.html > // 1. Transcode first characters of name > new String[] { "MACINTOSH", "MCANT" }, > - //new String[] { "KNUTH", "NNATH" }, // Original: > NNAT; modified: NATH > - //new String[] { "KOEHN", "C" }, > - //new String[] { "PHILLIPSON", "FFALAP" }, > - //new String[] { "PFEISTER", "FFASTA" }, > - //new String[] { "SCHOENHOEFT", "SSANAF" }, > + // violates 4j: the second N should not be added, as > the first > + // key char is already a N > + new String[] { "KNUTH", "NAT" }, // Original: NNAT; > modified: NATH > + // O and E are transcoded to A because of rule 4a > + // H also to A because of rule 4h > + // the N gets mysteriously lost, maybe because of a > wrongly implemented rule 4h > + // that skips the next char in such a case? > + // the remaining A is removed because of rule 7 > + new String[] { "KOEHN", "CAN" }, // Original: C > + // violates 4j: see also KNUTH > + new String[] { "PHILLIPSON", "FALAPSAN" }, // > Original: FFALAP[SAN] > + // violates 4j: see also KNUTH > + new String[] { "PFEISTER", "FASTAR" }, // Original: > FFASTA[R] > + // violoates 4j: see also KNUTH > + new String[] { "SCHOENHOEFT", "SANAFT" }, // > Original: SSANAF[T] > // > http://www.dropby.com/indexLF.html?content=/NYSIIS.html > // 2.Transcode last characters of name: > new String[] { "MCKEE", "MCY" }, > @@ -139,14 +205,21 @@ public class NysiisTest extends StringEn > new String[] { "BOWMAN", "BANAN" }, > new String[] { "MCKNIGHT", "MCNAGT" }, > new String[] { "RICKERT", "RACAD" }, > - //new String[] { "DEUTSCH", "DATS" }, > + // violates 5: the last S is not removed > + // when comparing to DEUTS, which is phonetically > similar > + // the result it also DAT, which is correct for > DEUTSCH too imo > + new String[] { "DEUTSCH", "DAT" }, // Original: DATS > new String[] { "WESTPHAL", "WASTFAL" }, > - //new String[] { "SHRIVER", "SHRAVA" }, > - //new String[] { "KUHL", "C" }, > + // violates 4h: the H should be transcoded to S and > thus ignored as > + // the first key character is also S > + new String[] { "SHRIVER", "SRAVAR" }, // Original: > SHRAVA[R] > + // same as KOEHN, the L gets mysteriously lost, the > correct one > + new String[] { "KUHL", "CAL" }, // Original: C > new String[] { "RAWSON", "RASAN" }, > // If last character is S, remove it > new String[] { "JILES", "JAL" }, > - //new String[] { "CARRAWAY", "CARAY" }, > + // violates 6: if the last two characters are AY, > remove A > + new String[] { "CARRAWAY", "CARY" }, // Original: > CARAY > new String[] { "YAMADA", "YANAD" }); > > for (String[] arr : testValues) { > > --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@commons.apache.org For additional commands, e-mail: dev-h...@commons.apache.org