AnalyzingQueryParser can't work with leading wildcards. -------------------------------------------------------
Key: LUCENE-949 URL: https://issues.apache.org/jira/browse/LUCENE-949 Project: Lucene - Java Issue Type: Bug Components: QueryParser Affects Versions: 2.2 Reporter: Stefan Klein The getWildcardQuery mehtod in AnalyzingQueryParser.java need the following changes to accept leading wildcards: protected Query getWildcardQuery(String field, String termStr) throws ParseException { String useTermStr = termStr; String leadingWildcard = null; if ("*".equals(field)) { if ("*".equals(useTermStr)) return new MatchAllDocsQuery(); } boolean hasLeadingWildcard = (useTermStr.startsWith("*") || useTermStr.startsWith("?")) ? true : false; if (!getAllowLeadingWildcard() && hasLeadingWildcard) throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); if (getLowercaseExpandedTerms()) { useTermStr = useTermStr.toLowerCase(); } if (hasLeadingWildcard) { leadingWildcard = useTermStr.substring(0, 1); useTermStr = useTermStr.substring(1); } List tlist = new ArrayList(); List wlist = new ArrayList(); /* * somewhat a hack: find/store wildcard chars in order to put them back * after analyzing */ boolean isWithinToken = (!useTermStr.startsWith("?") && !useTermStr.startsWith("*")); isWithinToken = true; StringBuffer tmpBuffer = new StringBuffer(); char[] chars = useTermStr.toCharArray(); for (int i = 0; i < useTermStr.length(); i++) { if (chars[i] == '?' || chars[i] == '*') { if (isWithinToken) { tlist.add(tmpBuffer.toString()); tmpBuffer.setLength(0); } isWithinToken = false; } else { if (!isWithinToken) { wlist.add(tmpBuffer.toString()); tmpBuffer.setLength(0); } isWithinToken = true; } tmpBuffer.append(chars[i]); } if (isWithinToken) { tlist.add(tmpBuffer.toString()); } else { wlist.add(tmpBuffer.toString()); } // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(useTermStr)); org.apache.lucene.analysis.Token t; int countTokens = 0; while (true) { try { t = source.next(); } catch (IOException e) { t = null; } if (t == null) { break; } if (!"".equals(t.termText())) { try { tlist.set(countTokens++, t.termText()); } catch (IndexOutOfBoundsException ioobe) { countTokens = -1; } } } try { source.close(); } catch (IOException e) { // ignore } if (countTokens != tlist.size()) { /* * this means that the analyzer used either added or consumed * (common for a stemmer) tokens, and we can't build a WildcardQuery */ throw new ParseException("Cannot build WildcardQuery with analyzer " + getAnalyzer().getClass() + " - tokens added or lost"); } if (tlist.size() == 0) { return null; } else if (tlist.size() == 1) { if (wlist.size() == 1) { /* * if wlist contains one wildcard, it must be at the end, * because: 1) wildcards at 1st position of a term by * QueryParser where truncated 2) if wildcard was *not* in end, * there would be *two* or more tokens */ StringBuffer sb = new StringBuffer(); if (hasLeadingWildcard) { // adding leadingWildcard sb.append(leadingWildcard); } sb.append((String) tlist.get(0)); sb.append(wlist.get(0).toString()); return super.getWildcardQuery(field, sb.toString()); } else if (wlist.size() == 0 && hasLeadingWildcard) { /* * if wlist contains no wildcard, it must be at 1st position */ StringBuffer sb = new StringBuffer(); if (hasLeadingWildcard) { // adding leadingWildcard sb.append(leadingWildcard); } sb.append((String) tlist.get(0)); sb.append(wlist.get(0).toString()); return super.getWildcardQuery(field, sb.toString()); } else { /* * we should never get here! if so, this method was called with * a termStr containing no wildcard ... */ throw new IllegalArgumentException("getWildcardQuery called without wildcard"); } } else { /* * the term was tokenized, let's rebuild to one token with wildcards * put back in postion */ StringBuffer sb = new StringBuffer(); if (hasLeadingWildcard) { // adding leadingWildcard sb.append(leadingWildcard); } for (int i = 0; i < tlist.size(); i++) { sb.append((String) tlist.get(i)); if (wlist != null && wlist.size() > i) { sb.append((String) wlist.get(i)); } } return super.getWildcardQuery(field, sb.toString()); } } -- This message is automatically generated by JIRA. - You can reply to this email to add a comment to the issue online. --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]