: Further to our discussions some time ago I've had some time to put : together an XML-based query parser with support for many "advanced" : query types not supported in the current Query parser. : : More details and code here: http://www.inperspective.com/lucene/LXQuery2.htm
So I *finally* got a chance to look at this in depth. In general, i think it looks great, but I have a few questions/comments... 1) The factory classes should have "removeBuilder" methods so people subclassing parsers can flat out remove support for a particular tag, not just replace it. 2) This DOM version definitely seems easier to follow/use then the SAX version (allthough thta could just be because i'm more familiar with DOM then SAX) .. but it still seems like an intermediate representation that wasn't org.w3c.dom.Element would be handy -- if for no other reason then so the methods in DOMUtils could be put directly in the "Element" class. 3) I'm still confused about how state information could/would be passed up or down the tree by builders. you mentioned something before about using ThreadLocal (which i'm not very familiar with) but i don't see any examples of that here. 4) The various getQuery (and getFilter and getSpanQuery) methods should use e.getTagName when throwing ParserExceptions about not finding what they're looking for, That way, if i write a parser that has... queryFactory.addBuilder("tf",new TermQueryObjectBuilder()); ... the ParserExceptions thrown by TermQueryObjectBuilder.getQuery when i forget to include a value="foo" can refrence the tag name i'm using (tf), and not the hardcoded constant "TermQuery" 5) it seems like a lot of redundency could be removed between the various builders by refactoring some more utility functions -- particularly in the error cases (see attached patch) -Hoss
Only in modified: build.xml Only in modified: lib diff -cr orig/src/org/apache/lucene/xmlparser/DOMUtils.java modified/src/org/apache/lucene/xmlparser/DOMUtils.java *** orig/src/org/apache/lucene/xmlparser/DOMUtils.java Wed Feb 22 21:57:54 2006 --- modified/src/org/apache/lucene/xmlparser/DOMUtils.java Sun Feb 26 17:16:41 2006 *************** *** 11,16 **** --- 11,74 ---- public class DOMUtils { + + public static Element getChildByTagOrFail(Element e, String name) + throws ParserException + { + Element kid = getChildByTagName(e,name); + if (null == kid) { + throw new ParserException(e.getTagName() + " missing \"" + + name + "\" child element"); + } + return kid; + } + + public static Element getFirstChildOrFail(Element e) + throws ParserException + { + Element kid = getFirstChildElement(e); + if (null == kid) { + throw new ParserException(e.getTagName() + + " does not contain a child element"); + } + return kid; + } + + public static String getAttributeOrFail(Element e, String name) + throws ParserException + { + String v = e.getAttribute(name); + if (null == v) { + throw new ParserException(e.getTagName() + " missing \"" + + name + "\" attribute"); + } + return v; + } + public static String getAttributeWithInheritanceOrFail(Element e, + String name) + throws ParserException + { + String v = getAttributeWithInheritance(e,name); + if (null == v) { + throw new ParserException(e.getTagName() + " missing \"" + + name + "\" attribute"); + } + return v; + } + public static String getNonBlankTextOrFail(Element e) + throws ParserException + { + String v = getText(e); + if (null != v) v = v.trim(); + if (null == v || 0 == v.length()) { + throw new ParserException(e.getTagName() + " has no text"); + } + return v; + } + + + + /* Convenience method where there is only one child Element of a given name */ public static Element getChildByTagName(Element e, String name) { diff -cr orig/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java modified/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java *** orig/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java Wed Feb 22 22:47:26 2006 --- modified/src/org/apache/lucene/xmlparser/FilteredQueryBuilder.java Sun Feb 26 16:51:43 2006 *************** *** 27,70 **** /* (non-Javadoc) * @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element) */ ! public Query getQuery(Element e) throws ParserException { ! Element filterElement=DOMUtils.getChildByTagName(e,"Filter"); ! if(filterElement==null) ! { ! throw new ParserException("FilteredQuery missing \"Filter\" child element"); ! } ! filterElement=DOMUtils.getFirstChildElement(filterElement); ! Filter f=null; ! if(filterElement!=null) ! { ! f=filterFactory.process(filterElement); ! } ! else ! { ! throw new ParserException("FilteredQuery \"Filter\" element missing child query element "); ! } ! ! ! Element queryElement=DOMUtils.getChildByTagName(e,"Query"); ! if(queryElement==null) ! { ! throw new ParserException("FilteredQuery missing \"Query\" child element"); ! } ! queryElement=DOMUtils.getFirstChildElement(queryElement); ! Query q=null; ! if(queryElement!=null) ! { ! q=queryFactory.getQuery(queryElement); ! } ! else ! { ! throw new ParserException("FilteredQuery \"Query\" element missing child query element "); ! } ! ! FilteredQuery fq = new FilteredQuery(q,f); ! fq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); ! return fq; } --- 27,45 ---- /* (non-Javadoc) * @see org.apache.lucene.xmlparser.QueryObjectBuilder#process(org.w3c.dom.Element) */ ! public Query getQuery(Element e) throws ParserException { ! ! Element filterElement=DOMUtils.getChildByTagOrFail(e,"Filter"); ! filterElement=DOMUtils.getFirstChildOrFail(filterElement); ! Filter f=filterFactory.process(filterElement); ! Element queryElement=DOMUtils.getChildByTagOrFail(e,"Query"); ! queryElement=DOMUtils.getFirstChildOrFail(queryElement); ! Query q=queryFactory.getQuery(queryElement); ! ! FilteredQuery fq = new FilteredQuery(q,f); ! fq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); ! return fq; } diff -cr orig/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java modified/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/BooleanQueryBuilder.java Sun Feb 26 16:59:12 2006 *************** *** 38,54 **** BooleanClause.Occur occurs=getOccursValue(clauseElem); //find the first element child which should contain a Query ! Element clauseQuery=DOMUtils.getFirstChildElement(clauseElem); ! if(clauseQuery!=null) ! { ! Query q=factory.getQuery(clauseQuery); ! bq.add(new BooleanClause(q,occurs)); ! ! } ! else ! { ! throw new ParserException("BooleanClause missing child query element "); ! } } return bq; --- 38,46 ---- BooleanClause.Occur occurs=getOccursValue(clauseElem); //find the first element child which should contain a Query ! Element clauseQuery=DOMUtils.getFirstChildOrFail(clauseElem); ! Query q=factory.getQuery(clauseQuery); ! bq.add(new BooleanClause(q,occurs)); } return bq; diff -cr orig/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java modified/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/BoostingQueryBuilder.java Sun Feb 26 16:58:58 2006 *************** *** 22,51 **** public Query getQuery(Element e) throws ParserException { ! Element mainQueryElem=DOMUtils.getChildByTagName(e,"Query"); ! if(mainQueryElem==null) ! { ! throw new ParserException("BoostingQuery missing a \"Query\" child element"); ! } ! mainQueryElem=DOMUtils.getFirstChildElement(mainQueryElem); ! if(mainQueryElem==null) ! { ! throw new ParserException("BoostingQuery \"Query\" element missing a child element"); ! } Query mainQuery=factory.getQuery(mainQueryElem); ! ! Element boostQueryElem=DOMUtils.getChildByTagName(e,"BoostQuery"); float boost=DOMUtils.getAttribute(boostQueryElem,"boost",defaultBoost); ! if(boostQueryElem==null) ! { ! throw new ParserException("BoostingQuery missing a \"BoostQuery\" child element"); ! } ! boostQueryElem=DOMUtils.getFirstChildElement(boostQueryElem); ! if(boostQueryElem==null) ! { ! throw new ParserException("BoostingQuery \"BoostQuery\" element missing a child element"); ! } Query boostQuery=factory.getQuery(boostQueryElem); BoostingQuery bq = new BoostingQuery(mainQuery,boostQuery,boost); --- 22,34 ---- public Query getQuery(Element e) throws ParserException { ! Element mainQueryElem=DOMUtils.getChildByTagOrFail(e,"Query"); ! mainQueryElem=DOMUtils.getFirstChildOrFail(mainQueryElem); Query mainQuery=factory.getQuery(mainQueryElem); ! Element boostQueryElem=DOMUtils.getChildByTagOrFail(e,"BoostQuery"); float boost=DOMUtils.getAttribute(boostQueryElem,"boost",defaultBoost); ! boostQueryElem=DOMUtils.getFirstChildOrFail(boostQueryElem); Query boostQuery=factory.getQuery(boostQueryElem); BoostingQuery bq = new BoostingQuery(mainQuery,boostQuery,boost); diff -cr orig/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java modified/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java Wed Feb 22 22:59:38 2006 --- modified/src/org/apache/lucene/xmlparser/builders/ConstantScoreQueryBuilder.java Sun Feb 26 17:06:16 2006 *************** *** 19,29 **** public Query getQuery(Element e) throws ParserException { ! Element filterElem=DOMUtils.getFirstChildElement(e); ! if(filterElem==null) ! { ! throw new ParserException("ConstantScoreQuery missing child element with filter"); ! } Query q=new ConstantScoreQuery(filterFactory.process(filterElem)); q.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); return q; --- 19,25 ---- public Query getQuery(Element e) throws ParserException { ! Element filterElem=DOMUtils.getFirstChildOrFail(e); Query q=new ConstantScoreQuery(filterFactory.process(filterElem)); q.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); return q; diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java modified/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/SpanNearBuilder.java Sun Feb 26 17:17:08 2006 *************** *** 19,29 **** public SpanQuery getSpanQuery(Element e) throws ParserException { ! String slopString=e.getAttribute("slop"); ! if((slopString==null)||(slopString.length()==0)) ! { ! throw new ParserException("SpanTermQuery missing slop property "); ! } int slop=Integer.parseInt(slopString); boolean inOrder=DOMUtils.getAttribute(e,"inOrder",false); ArrayList spans=new ArrayList(); --- 19,25 ---- public SpanQuery getSpanQuery(Element e) throws ParserException { ! String slopString=DOMUtils.getAttributeOrFail(e,"slop"); int slop=Integer.parseInt(slopString); boolean inOrder=DOMUtils.getAttribute(e,"inOrder",false); ArrayList spans=new ArrayList(); diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java modified/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/SpanNotBuilder.java Sun Feb 26 17:10:30 2006 *************** *** 24,44 **** Element includeElem=DOMUtils.getChildByTagName(e,"Include"); if(includeElem!=null) { ! includeElem=DOMUtils.getFirstChildElement(includeElem); } - if(includeElem==null) - { - throw new ParserException("SpanNotQuery missing Include child Element"); - } Element excludeElem=DOMUtils.getChildByTagName(e,"Exclude"); if(excludeElem!=null) { ! excludeElem=DOMUtils.getFirstChildElement(excludeElem); } - if(excludeElem==null) - { - throw new ParserException("SpanNotQuery missing Exclude child Element"); - } SpanQuery include=factory.getSpanQuery(includeElem); SpanQuery exclude=factory.getSpanQuery(excludeElem); --- 24,36 ---- Element includeElem=DOMUtils.getChildByTagName(e,"Include"); if(includeElem!=null) { ! includeElem=DOMUtils.getFirstChildOrFail(includeElem); } Element excludeElem=DOMUtils.getChildByTagName(e,"Exclude"); if(excludeElem!=null) { ! excludeElem=DOMUtils.getFirstChildOrFail(excludeElem); } SpanQuery include=factory.getSpanQuery(includeElem); SpanQuery exclude=factory.getSpanQuery(excludeElem); diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java modified/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java Sun Feb 26 17:11:42 2006 *************** *** 30,42 **** } public SpanQuery getSpanQuery(Element e) throws ParserException { ! String fieldName=DOMUtils.getAttributeWithInheritance(e,"fieldName"); ! if(fieldName==null) ! { ! throw new ParserException("Error: SpanOrTermsBuilder missing \"fieldName\" property"); ! } ! ! String value=DOMUtils.getText(e); try { --- 30,37 ---- } public SpanQuery getSpanQuery(Element e) throws ParserException { ! String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName"); ! String value=DOMUtils.getNonBlankTextOrFail(e); try { diff -cr orig/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java modified/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/SpanTermBuilder.java Sun Feb 26 17:09:39 2006 *************** *** 12,27 **** public SpanQuery getSpanQuery(Element e) throws ParserException { ! String fieldName=DOMUtils.getAttributeWithInheritance(e,"fieldName"); ! String value=DOMUtils.getText(e); ! if((fieldName==null)||(fieldName.length()==0)) ! { ! throw new ParserException("SpanTermQuery missing fieldName property "); ! } ! if((value==null)||(value.length()==0)) ! { ! throw new ParserException("TermQuery missing value property "); ! } SpanTermQuery stq = new SpanTermQuery(new Term(fieldName,value)); stq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); --- 12,19 ---- public SpanQuery getSpanQuery(Element e) throws ParserException { ! String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName"); ! String value=DOMUtils.getNonBlankTextOrFail(e); SpanTermQuery stq = new SpanTermQuery(new Term(fieldName,value)); stq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); diff -cr orig/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java modified/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/TermQueryObjectBuilder.java Sun Feb 26 17:04:36 2006 *************** *** 18,35 **** public class TermQueryObjectBuilder implements QueryBuilder { public Query getQuery(Element e) throws ParserException { ! String field=DOMUtils.getAttributeWithInheritance(e,"fieldName"); ! String value=e.getAttribute("value"); ! if((field==null)||(field.length()==0)) ! { ! throw new ParserException("TermQuery missing fieldName property "); ! } ! if((value==null)||(value.length()==0)) ! { ! throw new ParserException("TermQuery missing value property "); ! } ! TermQuery tq = new TermQuery(new Term(field,value)); ! tq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); return tq; } --- 18,26 ---- public class TermQueryObjectBuilder implements QueryBuilder { public Query getQuery(Element e) throws ParserException { ! String field=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName"); ! String value=DOMUtils.getAttributeOrFail(e,"value"); ! TermQuery tq = new TermQuery(new Term(field,value)); tq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f)); return tq; } diff -cr orig/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java modified/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java *** orig/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java Wed Feb 22 22:50:08 2006 --- modified/src/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java Sun Feb 26 17:13:49 2006 *************** *** 43,55 **** for(int i=0;i<nl.getLength();i++) { Element fieldElem=(Element) nl.item(i); ! String fieldName=DOMUtils.getAttributeWithInheritance(fieldElem,"fieldName"); ! ! if(fieldName==null) ! { ! throw new ParserException("TermsFilter missing \"fieldName\" element"); ! } ! String text=DOMUtils.getText(fieldElem).trim(); TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); try { --- 43,50 ---- for(int i=0;i<nl.getLength();i++) { Element fieldElem=(Element) nl.item(i); ! String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(fieldElem,"fieldName"); ! String text=DOMUtils.getNonBlankTextOrFail(fieldElem); TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); try {
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]