Author: ssmiweve Date: 2007-06-03 21:14:25 +0200 (Sun, 03 Jun 2007) New Revision: 5256
Modified: branches/2.13/core-api/src/main/java/no/schibstedsok/searchportal/mode/command/AbstractSimpleFastSearchCommand.java branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/BasicSearchResultItem.java branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/ResultItem.java branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/StringChopper.java Log: SEARCH-2621 - Rar sortering p?\195?\165 "Jenny hansen" Modified: branches/2.13/core-api/src/main/java/no/schibstedsok/searchportal/mode/command/AbstractSimpleFastSearchCommand.java =================================================================== --- branches/2.13/core-api/src/main/java/no/schibstedsok/searchportal/mode/command/AbstractSimpleFastSearchCommand.java 2007-06-03 18:16:02 UTC (rev 5255) +++ branches/2.13/core-api/src/main/java/no/schibstedsok/searchportal/mode/command/AbstractSimpleFastSearchCommand.java 2007-06-03 19:14:25 UTC (rev 5256) @@ -72,7 +72,7 @@ * @author <a href="mailto:[EMAIL PROTECTED]">Michael Semb Wever</a> * @version $Id$ */ -public abstract class AbstractSimpleFastSearchCommand extends AbstractSearchCommand { + public abstract class AbstractSimpleFastSearchCommand extends AbstractSearchCommand { // Constants ----------------------------------------------------- private static final Logger LOG = Logger.getLogger(AbstractSimpleFastSearchCommand.class); @@ -670,7 +670,7 @@ final ResultItem item = createResultItem(document); searchResult.addResult(item); } catch (NullPointerException e) { - if (LOG.isDebugEnabled()) LOG.debug("Error finding document " + e); + LOG.debug("Error finding document", e); return searchResult; } } Modified: branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/BasicSearchResultItem.java =================================================================== --- branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/BasicSearchResultItem.java 2007-06-03 18:16:02 UTC (rev 5255) +++ branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/BasicSearchResultItem.java 2007-06-03 19:14:25 UTC (rev 5256) @@ -48,7 +48,7 @@ */ public BasicSearchResultItem addField(final String field, final String value) { - fields.put(field, value); + fields.put(field, StringChopper.chop(value, -1)); return this; } @@ -60,12 +60,7 @@ public String getField(final String field) { final String fieldValue = (String) fields.get(field); - - if (fieldValue != null && (fieldValue.equals(" ") || fieldValue.equals(" "))) { - return null; - } else { - return fieldValue; - } + return fieldValue != null && fieldValue.trim().length() > 0 ? fieldValue : null; } /** @@ -110,15 +105,10 @@ public String getField(final String field, final int maxLength) { final String fieldValue = (String) fields.get(field); - - if (fieldValue != null) { - if (fieldValue.equals(" ")) { - return null; - } else { - return StringChopper.chop(fieldValue, maxLength); - } - } - return fieldValue; + + return fieldValue != null && fieldValue.trim().length() > 0 + ? StringChopper.chop(fieldValue, maxLength) + : null; } /** Returns a defensive copy of the field names existing in this resultItem. Modified: branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/ResultItem.java =================================================================== --- branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/ResultItem.java 2007-06-03 18:16:02 UTC (rev 5255) +++ branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/ResultItem.java 2007-06-03 19:14:25 UTC (rev 5256) @@ -43,8 +43,10 @@ * There is no guarantee that this instance is altered. * This allows implementations to be immutable if they choose to be. * + * Use addObjectField to add a non-html string into the result. + * * @param name - * @param value + * @param value html formatted string. html to display must be escaped. * @return */ ResultItem addField(String name, String value); Modified: branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/StringChopper.java =================================================================== --- branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/StringChopper.java 2007-06-03 18:16:02 UTC (rev 5255) +++ branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/StringChopper.java 2007-06-03 19:14:25 UTC (rev 5256) @@ -9,11 +9,9 @@ package no.schibstedsok.searchportal.result; import java.util.LinkedList; -import java.util.NoSuchElementException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; -import org.apache.commons.lang.StringEscapeUtils; /** My favourite dish of ChopSuey. * @@ -27,96 +25,133 @@ private static final Logger LOG = Logger.getLogger(StringChopper.class); private static final String DEBUG_CHOPSUEY = "Chopped it up to "; - private static final String MALFORMED_HTML_ESCAPING = "Malformed HTML. Escaping the entire thing: "; private static final Pattern openTag = Pattern.compile("<[^<]+>"); private static final Pattern closeTag = Pattern.compile("</[^<]+>"); private static final Pattern singleTag = Pattern.compile("<[^<]+/>"); - private static final Pattern la = Pattern.compile("<"); - private static final Pattern ra = Pattern.compile(">"); // Attributes ---------------------------------------------------- // Static -------------------------------------------------------- /** - * + * null safe. * @param s * @param length * @return */ public static String chop(final String s, final int length) { - final StringBuilder choppedString = new StringBuilder(); - - - if (s.length() <= length){ - choppedString.append(s); + if(null != s){ - } else { - - // chop the string first - choppedString.append(s.substring(0, length)); - - // if we chopped a tag in half remove the half left over. - int laCount = 0, raCount = 0, laOriginalCount = 0, raOriginalCount = 0; - for( Matcher m = la.matcher(choppedString); m.find(); ++laCount); - for( Matcher m = ra.matcher(choppedString); m.find(); ++raCount); - for( Matcher m = la.matcher(s); m.find(); ++laOriginalCount); - for( Matcher m = ra.matcher(s); m.find(); ++raOriginalCount); - // if we have more left than right arrows AND the original string was balanced - if( laCount > raCount && laOriginalCount == raOriginalCount){ - choppedString.setLength(choppedString.lastIndexOf("<")); + final StringBuilder choppedString = new StringBuilder(s); + + int laOriginalCount = 0, raOriginalCount = 0; + for(int i = 0; i < choppedString.length(); ++i){ + if( '<' == choppedString.charAt(i) ){ ++laOriginalCount; } + else if( '>' == choppedString.charAt(i) ){ ++raOriginalCount; } } - - // append the dot-dot-dot - switch( choppedString.length() >0 ? choppedString.charAt( choppedString.length() - 1 ) : ' '){ - case '.': - if( !choppedString.toString().endsWith("...")){ - if( choppedString.toString().endsWith("..")){ - choppedString.append('.'); - }else { - choppedString.append(".."); + + // if we have more left than right arrows + while(laOriginalCount > raOriginalCount){ + choppedString.append('>'); + ++raOriginalCount; + } + + if(length >= 0 && choppedString.length() > length){ + + // chop the string first + choppedString.setLength(length); + + // if we chopped a tag in half remove the half left over. + int laCount = 0, raCount = 0; + for(int i = 0; i < choppedString.length(); ++i){ + if( '<' == choppedString.charAt(i) ){ ++laCount; } + else if( '>' == choppedString.charAt(i) ){ ++raCount; } + } + + // if we have more left than right arrows + if( laCount > raCount ){ + choppedString.setLength(choppedString.lastIndexOf("<")); + } + + // append the dot-dot-dot + switch( choppedString.length() >0 ? choppedString.charAt( choppedString.length() - 1 ) : ' '){ + case '.': + final String toString = choppedString.toString(); + if( !toString.endsWith("...")){ + if( toString.endsWith("..")){ + choppedString.append('.'); + }else { + choppedString.append(".."); + } } - } - break; - default: - final int lastSpace = choppedString.lastIndexOf(" "); + break; + default: + final int lastSpace = choppedString.lastIndexOf(" "); - if (lastSpace >= 0) { - choppedString.setLength(lastSpace+1); + if (lastSpace >= 0) { + choppedString.setLength(lastSpace + 1); + } + choppedString.append("..."); + break; + } + + } + + if(0 < laOriginalCount){ + // balance opening tags if the chop happened inbetween open and close tags. + //LOG.debug("");LOG.debug("Balancing " + choppedString); + + final LinkedList<String> tags = new LinkedList<String>(); + final LinkedList<int[]> tagsToRemove = new LinkedList<int[]>(); + + final Matcher matcher = openTag.matcher(choppedString); + + while( matcher.find() ){ + if( closeTag.matcher(matcher.group()).find()) { + + if(tags.size() > 0 && matcher.group().equalsIgnoreCase(tags.getFirst().replaceFirst("<", "</"))){ + + //LOG.debug("Found closing tag " + matcher.group()); + tags.removeFirst(); + + }else{ + + // we've found a premature closing tag. remove it. + //LOG.debug("Found unmatched closing tag " + matcher.group()); + tagsToRemove.addFirst(new int[]{matcher.start(), matcher.end()}); + } + + }else if( singleTag.matcher(matcher.group()).find() ){ + + //LOG.debug("Ignoring single tag " + matcher.group()); + }else{ + + //LOG.debug("Found opening tag " + matcher.group()); + tags.addFirst(matcher.group()); } - choppedString.append("..."); - break; - } - + } - } + // remove tags that had no opening + for(int[] startEnd : tagsToRemove){ - // balance opening tags if the chop happened inbetween open and close tags. - final LinkedList<String> tags = new LinkedList<String>(); + //LOG.debug("Removing " + matcher.group()); + choppedString.delete(startEnd[0], startEnd[1]); + } - final Matcher matcher = openTag.matcher(choppedString); - while( matcher.find() ){ - if( closeTag.matcher(matcher.group()).find()) { - try { - tags.removeFirst(); - } catch (NoSuchElementException ex) { - LOG.warn(MALFORMED_HTML_ESCAPING + s); - return StringEscapeUtils.escapeHtml(s); - } - }else if( !singleTag.matcher(matcher.group()).find() ){ - tags.addFirst(matcher.group()); + // add tags to balance + for(String tag : tags){ + + //LOG.debug("Adding " + tag.replaceFirst("<", "</")); + choppedString.append(tag.replaceFirst("<", "</")); + } } + LOG.trace(DEBUG_CHOPSUEY + choppedString); + + return choppedString.toString(); } - - for(String tag : tags){ - choppedString.append(tag.replaceFirst("<","</")); - } - - LOG.trace(DEBUG_CHOPSUEY + choppedString); - - return choppedString.toString(); + return null; } // Constructors -------------------------------------------------- Property changes on: branches/2.13/result-spi/src/main/java/no/schibstedsok/searchportal/result/StringChopper.java ___________________________________________________________________ Name: svn:keywords + Id _______________________________________________ Kernel-commits mailing list [email protected] http://sesat.no/mailman/listinfo/kernel-commits
