Author: ssmiweve
Date: 2008-10-28 16:33:37 +0100 (Tue, 28 Oct 2008)
New Revision: 6895
Added:
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
Log:
Promote DiscardDuplicatesResultHandler into sesat. useful and generic code
written by Lars Johansson
Added:
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
===================================================================
---
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
(rev 0)
+++
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
2008-10-28 15:33:37 UTC (rev 6895)
@@ -0,0 +1,81 @@
+/* Copyright (2006-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ * SESAT is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SESAT is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with SESAT. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package no.sesat.search.result.handler;
+
+import no.sesat.search.result.handler.AbstractResultHandlerConfig.Controller;
+import no.sesat.search.site.config.AbstractDocumentFactory;
+import no.sesat.search.site.config.AbstractDocumentFactory.ParseType;
+import org.w3c.dom.Element;
+
+/** Removes from a ResultList duplicate ResultItems.
+ * Duplicates are identified via the values of a specified field.
+ * The check may be made case-insensitive.
+ *
+ * @author <a href="mailto:[EMAIL PROTECTED]">Lars Johansson</a>
+ * @version <tt>$Id$</tt>
+ */
[EMAIL PROTECTED]("DiscardDuplicatesResultHandler")
+public final class DiscardDuplicatesResultHandlerConfig extends
AbstractResultHandlerConfig {
+
+ private String field;
+
+ private boolean ignoreCase;
+
+ /** The field to check uniqueness against.
+ *
+ * @param string
+ */
+ public void setField(final String string) {
+ field = string;
+ }
+
+ /** @see #setField(java.lang.String)
+ *
+ * @return field name
+ */
+ public String getField() {
+ return field;
+ }
+
+ /** @see #setIgnoreCase(boolean)
+ *
+ * @return
+ */
+ public boolean isIgnoreCase() {
+ return ignoreCase;
+ }
+
+ /** Is the uniqueness case-insensitive.
+ * Default is false.
+ * @param ignoreCase true if case insensitive
+ */
+ public void setIgnoreCase(boolean ignoreCase) {
+ this.ignoreCase = ignoreCase;
+ }
+
+ @Override
+ public AbstractResultHandlerConfig readResultHandler(final Element
element) {
+
+ super.readResultHandler(element);
+
+ AbstractDocumentFactory.fillBeanProperty(this, null, "field",
ParseType.String, element, null);
+ AbstractDocumentFactory.fillBeanProperty(this, null, "ignoreCase",
ParseType.Boolean, element, "false");
+
+ return this;
+ }
+}
\ No newline at end of file
Property changes on:
trunk/generic.sesam/result-handler-config/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandlerConfig.java
___________________________________________________________________
Name: svn:keywords
+ Id
Added:
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
===================================================================
---
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
(rev 0)
+++
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
2008-10-28 15:33:37 UTC (rev 6895)
@@ -0,0 +1,68 @@
+/* Copyright (2006-2008) Schibsted Søk AS
+ * This file is part of SESAT.
+ *
+ * SESAT is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * SESAT is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with SESAT. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+package no.sesat.search.result.handler;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import no.sesat.search.datamodel.DataModel;
+import no.sesat.search.result.ResultItem;
+
+/** @see DiscardDuplicatesResultHandlerConfig
+ *
+ * @author <a href="mailto:[EMAIL PROTECTED]">Lars Johansson</a>
+ * @version <tt>$Id$</tt>
+ */
+public final class DiscardDuplicatesResultHandler implements ResultHandler {
+
+ private final Collection<String> keys = new ArrayList<String>();
+
+ private final DiscardDuplicatesResultHandlerConfig config;
+
+ public DiscardDuplicatesResultHandler(final ResultHandlerConfig config) {
+ this.config = (DiscardDuplicatesResultHandlerConfig) config;
+ }
+
+ public void handleResult(final Context cxt, final DataModel datamodel) {
+
+ final Collection<ResultItem> toDelete = new ArrayList<ResultItem>();
+
+ // scan for duplicates
+ for (ResultItem searchResultItem : cxt.getSearchResult().getResults())
{
+
+ String uniqueField = searchResultItem.getField(config.getField())
+ ""; //avoid nullpointers
+
+ if (config.isIgnoreCase()) {
+ uniqueField = uniqueField.toLowerCase();
+ }
+
+ //remove entries with same name (not emtpy ones)
+ if (uniqueField.length() > 0 && keys.contains(uniqueField)) {
+
+ toDelete.add(searchResultItem);
+
+ }else {
+ keys.add(uniqueField);
+ }
+ }
+
+ // now delete
+ for(ResultItem item : toDelete){
+ cxt.getSearchResult().removeResult(item);
+ }
+ }
+}
\ No newline at end of file
Property changes on:
trunk/generic.sesam/result-handler-control/src/main/java/no/sesat/search/result/handler/DiscardDuplicatesResultHandler.java
___________________________________________________________________
Name: svn:keywords
+ Id
_______________________________________________
Kernel-commits mailing list
[email protected]
http://sesat.no/mailman/listinfo/kernel-commits