Author: magnuse
Date: 2006-04-06 13:45:30 +0200 (Thu, 06 Apr 2006)
New Revision: 2730

Added:
   
trunk/src/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformer.java
   
trunk/src/test/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformerTest.java
Log:
First version.


Added: 
trunk/src/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformer.java
===================================================================
--- 
trunk/src/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformer.java
                              (rev 0)
+++ 
trunk/src/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformer.java
      2006-04-06 11:45:30 UTC (rev 2730)
@@ -0,0 +1,142 @@
+/*
+ * SynonymQueryTransformer.java
+ *
+ * Created on April 5, 2006, 8:05 PM
+ *
+ */
+
+package no.schibstedsok.front.searchportal.query.transform;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import no.schibstedsok.front.searchportal.query.DefaultOperatorClause;
+import no.schibstedsok.front.searchportal.query.LeafClause;
+import no.schibstedsok.front.searchportal.query.token.TokenPredicate;
+
+/**
+ *
+ * @author maek
+ */
+public final class SynonymQueryTransformer extends AbstractQueryTransformer {
+    
+    /** Synonym expansion are only performed for clauses matching the 
predicates
+     * contained in predicateNames */
+    private final Collection<String> predicateNames = new ArrayList<String>();
+    private Collection<TokenPredicate> predicates = null;
+    
+    private final List<LeafClause> leafs = new ArrayList<LeafClause>();
+    private final Set<TokenPredicate> matchingPredicates = new 
HashSet<TokenPredicate>();
+    private final List<LeafClause> expanded = new ArrayList<LeafClause>();
+    
+    private final StringBuilder builder = new StringBuilder();
+
+    private boolean fromDefault = false;
+    
+    public void addPredicateName(final String name) {
+        predicateNames.add(name);
+    }
+    
+    protected void visitImpl(final DefaultOperatorClause clause) {
+        for (final TokenPredicate p : getPredicates()) {
+            
+            if (clause.getKnownPredicates().contains(p)
+            || clause.getPossiblePredicates().contains(p)) {
+                matchingPredicates.add(p);
+            }
+        }
+        
+        clause.getFirstClause().accept(this);
+        clause.getSecondClause().accept(this);
+    }
+    
+    protected void visitImpl(final LeafClause clause) {
+        if (! matchingPredicates.isEmpty() && !expanded.contains(clause)) {
+            for (final TokenPredicate p : matchingPredicates) {
+                
+                if (matchingPredicates.size() > 0) {
+                    builder.append(' ');
+                }
+                
+                if (isSynonym(builder.toString() + clause.getTerm())) {
+                    builder.append(clause.getTerm());
+                    leafs.add(clause);
+                } 
+                else {
+                    if (!leafs.isEmpty()) {
+                        expandSynonym(leafs, getSynonym(builder.toString()));
+                        expanded.addAll(leafs);
+                        leafs.clear();
+                        matchingPredicates.clear();
+                        builder.setLength(0);
+                    }
+                }
+            }
+        }
+        
+        if (clause == getContext().getQuery().getFirstLeafClause()) {
+            for (TokenPredicate predicate : getPredicates()) {
+                if (clause.getPossiblePredicates().contains(predicate)
+                || clause.getKnownPredicates().contains(predicate)) {
+                    if 
(isSynonym(getContext().getTransformedTerms().get(clause))) {
+                        expandSynonym(clause, 
getSynonym(getContext().getTransformedTerms().get(clause)));
+                        expanded.add(clause);
+                        return;
+                    }
+                }
+            }
+        }
+    }
+    
+    private void expandSynonym(final List<LeafClause> replace, String synonym) 
{
+        final LeafClause first = replace.get(0);
+        final LeafClause last = replace.get(0);
+        
+        if (first != last) {
+            getContext().getTransformedTerms().put(first, "(" + 
first.getTerm());
+            getContext().getTransformedTerms().put(last, last.getTerm()+ " " + 
synonym + ")");
+        } else {
+            getContext().getTransformedTerms().put(last, "(" + last.getTerm()+ 
" " + synonym + ")");
+        }
+    }
+    
+    private void expandSynonym(final LeafClause replace, String synonym) {
+        final String originalTerm = 
getContext().getTransformedTerms().get(replace);
+        getContext().getTransformedTerms().put(replace, "(" + originalTerm + " 
" + synonym + ")");
+    }
+    
+    private Collection<TokenPredicate> getPredicates() {
+        synchronized (this) {
+            if (predicates == null) {
+                predicates = new ArrayList<TokenPredicate>();
+                for (final String predicateName : predicateNames) {
+                    final TokenPredicate p = 
TokenPredicate.valueOf(predicateName);
+                    predicates.add(p);
+                }
+            }
+        }
+        return predicates;
+    }
+    
+    private boolean isSynonym(String string) {
+        return string.trim().equalsIgnoreCase("sch") || 
string.trim().equalsIgnoreCase("schibsted") || 
string.trim().equalsIgnoreCase("schibsted asa");
+    }
+    
+    private String getSynonym(String string) {
+        if (string.trim().equalsIgnoreCase("sch")) {
+            return "schibsted";
+        }
+        
+        if (string.trim().equalsIgnoreCase("schibsted")) {
+            return "sch";
+        }
+        
+        if (string.trim().equalsIgnoreCase("schibsted asa")) {
+            return "schasa";
+        }
+        
+        return null;
+    }
+}

Added: 
trunk/src/test/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformerTest.java
===================================================================
--- 
trunk/src/test/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformerTest.java
                             (rev 0)
+++ 
trunk/src/test/java/no/schibstedsok/front/searchportal/query/transform/SynonymQueryTransformerTest.java
     2006-04-06 11:45:30 UTC (rev 2730)
@@ -0,0 +1,273 @@
+/*
+ * SynonymQueryTransformerTest.java
+ *
+ * Created on April 5, 2006, 9:32 PM
+ *
+ * To change this template, choose Tools | Template Manager
+ * and open the template in the editor.
+ */
+
+package no.schibstedsok.front.searchportal.query.transform;
+
+import com.thoughtworks.xstream.XStream;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import javax.xml.parsers.DocumentBuilder;
+import junit.framework.TestCase;
+import 
no.schibstedsok.front.searchportal.configuration.FileResourcesSearchTabsCreatorTest;
+import no.schibstedsok.front.searchportal.configuration.loader.DocumentLoader;
+import 
no.schibstedsok.front.searchportal.configuration.loader.FileResourceLoader;
+import 
no.schibstedsok.front.searchportal.configuration.loader.PropertiesLoader;
+import no.schibstedsok.front.searchportal.configuration.loader.XStreamLoader;
+import no.schibstedsok.front.searchportal.query.AndClause;
+import no.schibstedsok.front.searchportal.query.AndNotClause;
+import no.schibstedsok.front.searchportal.query.Clause;
+import no.schibstedsok.front.searchportal.query.DefaultOperatorClause;
+import no.schibstedsok.front.searchportal.query.LeafClause;
+import no.schibstedsok.front.searchportal.query.NotClause;
+import no.schibstedsok.front.searchportal.query.OperationClause;
+import no.schibstedsok.front.searchportal.query.OrClause;
+import no.schibstedsok.front.searchportal.query.Query;
+import no.schibstedsok.front.searchportal.query.Visitor;
+import no.schibstedsok.front.searchportal.query.XorClause;
+import 
no.schibstedsok.front.searchportal.query.parser.AbstractQueryParserContext;
+import 
no.schibstedsok.front.searchportal.query.parser.AbstractReflectionVisitor;
+import no.schibstedsok.front.searchportal.query.parser.ParseException;
+import no.schibstedsok.front.searchportal.query.parser.QueryParser;
+import no.schibstedsok.front.searchportal.query.parser.QueryParserImpl;
+import no.schibstedsok.front.searchportal.query.token.TokenEvaluatorFactory;
+import 
no.schibstedsok.front.searchportal.query.token.TokenEvaluatorFactoryImpl;
+import no.schibstedsok.front.searchportal.site.Site;
+import org.apache.log4j.Logger;
+
+/**
+ *
+ * @author maek
+ */
+public class SynonymQueryTransformerTest extends TestCase {
+
+    private static final Logger LOG =
+            Logger.getLogger(SynonymQueryTransformerTest.class);
+
+    public SynonymQueryTransformerTest(final String testName) {
+        super(testName);
+    }
+
+    public void testOneWordExact() throws ParseException {
+        final Query query = parseQuery("sch");
+        final Map trans = applyTransformer(new SynonymQueryTransformer(), 
query, "EXACT_STOCKMARKETTICKERS");
+        final QueryBuilder builder = new QueryBuilder(query, trans);
+
+        assertEquals("(sch schibsted)", builder.getQueryString());
+    }
+
+    public void testOneWord() throws ParseException {
+        final Query query = parseQuery("sch");
+        final Map trans = applyTransformer(new SynonymQueryTransformer(), 
query, "STOCKMARKETTICKERS");
+        final QueryBuilder builder = new QueryBuilder(query, trans);
+
+        assertEquals("(sch schibsted)", builder.getQueryString());
+    }
+
+    public void testTwoWords() throws ParseException {
+        final Query query = parseQuery("oslo sch schibsted");
+        final Map trans = applyTransformer(new SynonymQueryTransformer(), 
query, "STOCKMARKETTICKERS");
+        final QueryBuilder builder = new QueryBuilder(query, trans);
+
+        assertEquals("oslo (sch schibsted) schibsted", 
builder.getQueryString());
+    }
+
+//    public void testTwoWordsExact() throws ParseException {
+//        // Not Exact match. Don't do expansion.
+//        final Query query = parseQuery("oslo sch schibsted");
+//        final Map trans = applyTransformer(new SynonymQueryTransformer(), 
query, "EXACT_STOCKMARKETTICKERS");
+//        final QueryBuilder builder = new QueryBuilder(query, trans);
+//
+//        assertEquals("oslo sch schibsted", builder.getQueryString());
+//    }
+//
+//    public void testMultiWordOriginalWithOtherTermAtEnd() throws 
ParseException {
+//        final Query query = parseQuery("schibsted asa oslo");
+//        final Map trans = applyTransformer(new SynonymQueryTransformer(), 
query, "COMPANYRANK");
+//        final QueryBuilder builder = new QueryBuilder(query, trans);
+//
+//        assertEquals("(schibsted asa schasa) oslo", 
builder.getQueryString());
+//    }
+    
+    private Map applyTransformer(final SynonymQueryTransformer t, final Query 
query, final String predicateName) {
+        
+        final Map<Clause,String> transformedTerms = new 
LinkedHashMap<Clause,String>();
+        
+        final QueryTransformer.Context qtCxt = new QueryTransformer.Context() {
+            
+            public Map<Clause,String> getTransformedTerms() {
+                return transformedTerms;
+            }
+            public Site getSite() {
+                return Site.DEFAULT;
+            }
+            public Query getQuery() {
+                return query;
+            }
+            public String getTransformedQuery() {
+                return query.getQueryString();
+            }
+            public PropertiesLoader newPropertiesLoader(final String resource, 
final Properties properties) {
+                return FileResourceLoader.newPropertiesLoader(this, resource, 
properties);
+            }
+            
+            public XStreamLoader newXStreamLoader(final String resource, final 
XStream xstream) {
+                return FileResourceLoader.newXStreamLoader(this, resource, 
xstream);
+            }
+            
+            public DocumentLoader newDocumentLoader(final String resource, 
final DocumentBuilder builder) {
+                return FileResourceLoader.newDocumentLoader(this, resource, 
builder);
+            }
+        };
+
+        t.addPredicateName(predicateName);
+        t.setContext(qtCxt);
+        
+        final Visitor mapInitialisor = new MapInitialisor(transformedTerms);
+        mapInitialisor.visit(query.getRootClause());
+        t.visit(query.getRootClause());
+        return transformedTerms;
+    }
+    
+    private Query parseQuery(final String queryString) throws ParseException {
+        
+        final TokenEvaluatorFactory tokenEvaluatorFactory  = new 
TokenEvaluatorFactoryImpl(
+                new TokenEvaluatorFactoryImpl.Context() {
+            public String getQueryString() {
+                return queryString;
+            }
+            
+            public Properties getApplicationProperties() {
+                return 
FileResourcesSearchTabsCreatorTest.valueOf(Site.DEFAULT).getProperties();
+            }
+            
+            public PropertiesLoader newPropertiesLoader(final String resource, 
final Properties properties) {
+                return FileResourceLoader.newPropertiesLoader(this, resource, 
properties);
+            }
+            
+            public XStreamLoader newXStreamLoader(final String resource, final 
XStream xstream) {
+                return FileResourceLoader.newXStreamLoader(this, resource, 
xstream);
+            }
+            
+            public DocumentLoader newDocumentLoader(final String resource, 
final DocumentBuilder builder) {
+                return FileResourceLoader.newDocumentLoader(this, resource, 
builder);
+            }
+            
+            public Site getSite()  {
+                return Site.DEFAULT;
+            }
+        });
+        
+        final QueryParser parser = new QueryParserImpl(new 
AbstractQueryParserContext() {
+            public TokenEvaluatorFactory getTokenEvaluatorFactory() {
+                return tokenEvaluatorFactory;
+            }
+        });
+        
+        final Query query = parser.getQuery();
+        return query;
+    }
+    
+    public static final class QueryBuilder extends AbstractReflectionVisitor {
+        private final Query query;
+        private final Map map;
+        private final StringBuffer sb = new StringBuffer();
+        
+        public QueryBuilder(final Query q, final Map m) {
+            query = q;
+            map = m;
+        }
+        
+        public synchronized String getQueryString() {
+            sb.setLength(0);
+            visit(query.getRootClause());
+            return sb.toString();
+        }
+        
+        public void visitImpl(final LeafClause clause) {
+            sb.append(map.get(clause));
+        }
+        public void visitImpl(final OperationClause clause) {
+            clause.getFirstClause().accept(this);
+        }
+        public void visitImpl(final AndClause clause) {
+            clause.getFirstClause().accept(this);
+            sb.append(" AND ");
+            clause.getSecondClause().accept(this);
+        }
+        public void visitImpl(final OrClause clause) {
+            clause.getFirstClause().accept(this);
+            sb.append(" OR ");
+            clause.getSecondClause().accept(this);
+        }
+        public void visitImpl(final DefaultOperatorClause clause) {
+            clause.getFirstClause().accept(this);
+            sb.append(' ');
+            clause.getSecondClause().accept(this);
+        }
+        public void visitImpl(final NotClause clause) {
+            final String childsTerm = (String) 
map.get(clause.getFirstClause());
+            if (childsTerm != null && childsTerm.length() > 0) {
+                sb.append("NOT ");
+                clause.getFirstClause().accept(this);
+            }
+        }
+        public void visitImpl(final AndNotClause clause) {
+            final String childsTerm = (String) 
map.get(clause.getFirstClause());
+            if (childsTerm != null && childsTerm.length() > 0) {
+                sb.append("ANDNOT ");
+                clause.getFirstClause().accept(this);
+            }
+        }
+        public void visitImpl(final XorClause clause) {
+            // [TODO] we need to determine which branch in the query-tree we 
want to use.
+            //  Both branches to a XorClause should never be used.
+            clause.getFirstClause().accept(this);
+            // clause.getSecondClause().accept(this);
+        }
+    }
+    
+    private static class MapInitialisor extends AbstractReflectionVisitor {
+        
+        private final Map map;
+        
+        public MapInitialisor(final Map m) {
+            map = m;
+        }
+        
+        public void visitImpl(final LeafClause clause) {
+            final String fullTerm =
+                    (clause.getField() == null ? "" : clause.getField() + ": ")
+                    + clause.getTerm();
+            
+            map.put(clause, fullTerm);
+        }
+        public void visitImpl(final OperationClause clause) {
+            clause.getFirstClause().accept(this);
+        }
+        public void visitImpl(final AndClause clause) {
+            clause.getFirstClause().accept(this);
+            clause.getSecondClause().accept(this);
+        }
+        public void visitImpl(final OrClause clause) {
+            clause.getFirstClause().accept(this);
+            clause.getSecondClause().accept(this);
+        }
+        public void visitImpl(final DefaultOperatorClause clause) {
+            clause.getFirstClause().accept(this);
+            clause.getSecondClause().accept(this);
+        }
+    }
+    public static void main(String args[]) throws ParseException {
+        SynonymQueryTransformerTest test = new 
SynonymQueryTransformerTest("test");
+        
+        test.testMultiWordOriginalWithOtherTermAtEnd();
+    }
+}

_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits

Reply via email to