why are we backporting new features to a bugfix branch? On Wed, May 30, 2012 at 1:07 AM, <chr...@apache.org> wrote: > Author: chrism > Date: Wed May 30 05:07:31 2012 > New Revision: 1344101 > > URL: http://svn.apache.org/viewvc?rev=1344101&view=rev > Log: > LUCENE-4079: Added support for aliaising (AF rules) to Hunspell > > Added: > > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.aff > - copied unchanged from r1344095, > lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.aff > > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.dic > - copied unchanged from r1344095, > lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/testCompressed.dic > Modified: > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt > > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java > > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java > > Modified: lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt?rev=1344101&r1=1344100&r2=1344101&view=diff > ============================================================================== > --- lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt (original) > +++ lucene/dev/branches/lucene_solr_3_6/lucene/contrib/CHANGES.txt Wed May 30 > 05:07:31 2012 > @@ -33,6 +33,11 @@ Bug Fixes > * LUCENE-4074: FST Sorter BufferSize creates a negative buffer size due to an > 32 bit signed integer overflow if BufferSize >= 2048MB is passed. > (Simon Willnauer) > + > +New Features > + > + * LUCENE-4079: Added support for aliasing (AF rules) in Hunspell > dictionaries > + (Ludovic Boutros via Chris Male) > > Tests > > > Modified: > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java?rev=1344101&r1=1344100&r2=1344101&view=diff > ============================================================================== > --- > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java > (original) > +++ > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java > Wed May 30 05:07:31 2012 > @@ -37,6 +37,7 @@ public class HunspellDictionary { > > static final HunspellWord NOFLAGS = new HunspellWord(); > > + private static final String ALIAS_KEY = "AF"; > private static final String PREFIX_KEY = "PFX"; > private static final String SUFFIX_KEY = "SFX"; > private static final String FLAG_KEY = "FLAG"; > @@ -59,6 +60,9 @@ public class HunspellDictionary { > > private final Version version; > > + private String[] aliases; > + private int aliasCount = 0; > + > /** > * Creates a new HunspellDictionary containing the information read from > the provided InputStreams to hunspell affix > * and dictionary files > @@ -161,7 +165,9 @@ public class HunspellDictionary { > BufferedReader reader = new BufferedReader(new > InputStreamReader(affixStream, decoder)); > String line = null; > while ((line = reader.readLine()) != null) { > - if (line.startsWith(PREFIX_KEY)) { > + if (line.startsWith(ALIAS_KEY)) { > + parseAlias(line); > + } else if (line.startsWith(PREFIX_KEY)) { > parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN); > } else if (line.startsWith(SUFFIX_KEY)) { > parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN); > @@ -206,7 +212,13 @@ public class HunspellDictionary { > > int flagSep = affixArg.lastIndexOf('/'); > if (flagSep != -1) { > - char appendFlags[] = > flagParsingStrategy.parseFlags(affixArg.substring(flagSep + 1)); > + String flagPart = affixArg.substring(flagSep + 1); > + > + if (aliasCount > 0) { > + flagPart = getAliasValue(Integer.parseInt(flagPart)); > + } > + > + char appendFlags[] = flagParsingStrategy.parseFlags(flagPart); > Arrays.sort(appendFlags); > affix.setAppendFlags(appendFlags); > affix.setAppend(affixArg.substring(0, flagSep)); > @@ -330,8 +342,12 @@ public class HunspellDictionary { > if (end == -1) > end = line.length(); > > + String flagPart = line.substring(flagSep + 1, end); > + if (aliasCount > 0) { > + flagPart = getAliasValue(Integer.parseInt(flagPart)); > + } > > - wordForm = new > HunspellWord(flagParsingStrategy.parseFlags(line.substring(flagSep + 1, > end))); > + wordForm = new > HunspellWord(flagParsingStrategy.parseFlags(flagPart)); > Arrays.sort(wordForm.getFlags()); > entry = line.substring(0, flagSep); > if(ignoreCase) { > @@ -352,6 +368,25 @@ public class HunspellDictionary { > return version; > } > > + private void parseAlias(String line) { > + String ruleArgs[] = line.split("\\s+"); > + if (aliases == null) { > + //first line should be the aliases count > + final int count = Integer.parseInt(ruleArgs[1]); > + aliases = new String[count]; > + } else { > + aliases[aliasCount++] = ruleArgs[1]; > + } > + } > + > + private String getAliasValue(int id) { > + try { > + return aliases[id - 1]; > + } catch (IndexOutOfBoundsException ex) { > + throw new IllegalArgumentException("Bad flag alias number:" + id, ex); > + } > + } > + > /** > * Abstraction of the process of parsing flags taken from the affix and dic > files > */ > > Modified: > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java?rev=1344101&r1=1344100&r2=1344101&view=diff > ============================================================================== > --- > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java > (original) > +++ > lucene/dev/branches/lucene_solr_3_6/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java > Wed May 30 05:07:31 2012 > @@ -39,4 +39,18 @@ public class HunspellDictionaryTest exte > affixStream.close(); > dictStream.close(); > } > + > + @Test > + public void testCompressedHunspellDictionary_loadDicAff() throws > IOException, ParseException { > + InputStream affixStream = > getClass().getResourceAsStream("testCompressed.aff"); > + InputStream dictStream = > getClass().getResourceAsStream("testCompressed.dic"); > + > + HunspellDictionary dictionary = new HunspellDictionary(affixStream, > dictStream, TEST_VERSION_CURRENT); > + assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).size()); > + assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).size()); > + assertEquals(1, dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, > 3).size()); > + > + affixStream.close(); > + dictStream.close(); > + } > } > >
-- lucidimagination.com --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org