Author: ssmiweve
Date: 2008-10-28 16:33:37 +0100 (Tue, 28 Oct 2008)
New Revision: 6895

Added:
   
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
   
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
Log:
Promote DiscardDuplicatesResultHandler into sesat. useful and generic code 
written by Lars Johansson


Added: 
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
===================================================================
--- 
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
                            (rev 0)
+++ 
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
    2008-10-28 15:33:37 UTC (rev 6895)
@@ -0,0 +1,81 @@
+/* Copyright (2006-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ *   SESAT is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU Affero General Public License as published 
by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   SESAT is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU Affero General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Affero General Public License
+ *   along with SESAT.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package no.sesat.search.result.handler;
+
+import no.sesat.search.result.handler.AbstractResultHandlerConfig.Controller;
+import no.sesat.search.site.config.AbstractDocumentFactory;
+import no.sesat.search.site.config.AbstractDocumentFactory.ParseType;
+import org.w3c.dom.Element;
+
+/** Removes from a ResultList duplicate ResultItems.
+ * Duplicates are identified via the values of a specified field.
+ * The check may be made case-insensitive.
+ *
+ * @author <a href="mailto:[EMAIL PROTECTED]">Lars Johansson</a>
+ * @version <tt>$Id$</tt>
+ */
[EMAIL PROTECTED]("DiscardDuplicatesResultHandler")
+public final class DiscardDuplicatesResultHandlerConfig extends 
AbstractResultHandlerConfig {
+
+    private String field;
+
+    private boolean ignoreCase;
+
+    /** The field to check uniqueness against.
+     *
+     * @param string
+     */
+    public void setField(final String string) {
+        field = string;
+    }
+
+    /** @see #setField(java.lang.String)
+     *
+     * @return field name
+     */
+    public String getField() {
+        return field;
+    }
+
+    /** @see #setIgnoreCase(boolean)
+     *
+     * @return
+     */
+    public boolean isIgnoreCase() {
+        return ignoreCase;
+    }
+
+    /** Is the uniqueness case-insensitive.
+     * Default is false.
+     * @param ignoreCase true if case insensitive
+     */
+    public void setIgnoreCase(boolean ignoreCase) {
+        this.ignoreCase = ignoreCase;
+    }
+
+    @Override
+    public AbstractResultHandlerConfig readResultHandler(final Element 
element) {
+
+        super.readResultHandler(element);
+
+        AbstractDocumentFactory.fillBeanProperty(this, null, "field", 
ParseType.String, element, null);
+        AbstractDocumentFactory.fillBeanProperty(this, null, "ignoreCase", 
ParseType.Boolean, element, "false");
+
+        return this;
+    }
+}
\ No newline at end of file


Property changes on: 
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
___________________________________________________________________
Name: svn:keywords
   + Id

Added: 
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
===================================================================
--- 
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
                         (rev 0)
+++ 
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
 2008-10-28 15:33:37 UTC (rev 6895)
@@ -0,0 +1,68 @@
+/* Copyright (2006-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ *   SESAT is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU Affero General Public License as published 
by
+ *   the Free Software Foundation, either version 3 of the License, or
+ *   (at your option) any later version.
+ *
+ *   SESAT is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU Affero General Public License for more details.
+ *
+ *   You should have received a copy of the GNU Affero General Public License
+ *   along with SESAT.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package no.sesat.search.result.handler;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import no.sesat.search.datamodel.DataModel;
+import no.sesat.search.result.ResultItem;
+
+/** @see DiscardDuplicatesResultHandlerConfig
+ *
+ * @author <a href="mailto:[EMAIL PROTECTED]">Lars Johansson</a>
+ * @version <tt>$Id$</tt>
+ */
+public final class DiscardDuplicatesResultHandler implements ResultHandler {
+
+    private final Collection<String> keys = new ArrayList<String>();
+
+    private final DiscardDuplicatesResultHandlerConfig config;
+
+    public DiscardDuplicatesResultHandler(final ResultHandlerConfig config) {
+        this.config = (DiscardDuplicatesResultHandlerConfig) config;
+    }
+
+    public void handleResult(final Context cxt, final DataModel datamodel) {
+
+        final Collection<ResultItem> toDelete = new ArrayList<ResultItem>();
+
+        // scan for duplicates
+        for (ResultItem searchResultItem : cxt.getSearchResult().getResults()) 
{
+
+            String uniqueField = searchResultItem.getField(config.getField()) 
+ "";    //avoid nullpointers
+
+            if (config.isIgnoreCase()) {
+                uniqueField = uniqueField.toLowerCase();
+            }
+
+            //remove entries with same name (not emtpy ones)
+            if (uniqueField.length() > 0 && keys.contains(uniqueField)) {
+
+                toDelete.add(searchResultItem);
+
+            }else {
+                keys.add(uniqueField);
+            }
+        }
+
+        // now delete
+        for(ResultItem item : toDelete){
+            cxt.getSearchResult().removeResult(item);
+        }
+    }
+}
\ No newline at end of file


Property changes on: 
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
___________________________________________________________________
Name: svn:keywords
   + Id

_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits

Reply via email to