vgritsenko 02/02/26 21:28:38 Modified: src/java/org/apache/cocoon/components/crawler SimpleCocoonCrawlerImpl.java src/java/org/apache/cocoon/components/search SimpleLuceneXMLIndexerImpl.java Log: fix issue with content type containing encoding Revision Changes Path 1.8 +9 -5 xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java Index: SimpleCocoonCrawlerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- SimpleCocoonCrawlerImpl.java 22 Feb 2002 07:00:06 -0000 1.7 +++ SimpleCocoonCrawlerImpl.java 27 Feb 2002 05:28:38 -0000 1.8 @@ -82,7 +82,7 @@ * A simple cocoon crawler. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.7 2002/02/22 07:00:06 cziegeler Exp $ + * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $ */ public class SimpleCocoonCrawlerImpl extends AbstractLoggable implements CocoonCrawler, Configurable, Disposable, Recyclable @@ -479,12 +479,16 @@ InputStream is = links_url_connection.getInputStream(); BufferedReader br = new BufferedReader(new InputStreamReader(is)); - String content_type = links_url_connection.getContentType(); + String contentType = links_url_connection.getContentType(); + int index = contentType.indexOf(';'); + if (contentType != null && index != -1) { + contentType = contentType.substring(0, index); + } if (getLogger().isDebugEnabled()) { - getLogger().debug("Content-type: " + content_type); + getLogger().debug("Content-type: " + contentType); } - if (content_type.equals(linkContentType)) { + if (contentType.equals(linkContentType)) { url_links = new ArrayList(); // content is supposed to be a list of links, @@ -598,7 +602,7 @@ * </p> * * @author <a href="mailto:[EMAIL PROTECTED]>Bernhard Huber</a> - * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.7 2002/02/22 07:00:06 cziegeler Exp $ + * @version $Id: SimpleCocoonCrawlerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $ */ public static class CocoonCrawlerIterator implements Iterator { 1.8 +7 -1 xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java Index: SimpleLuceneXMLIndexerImpl.java =================================================================== RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v retrieving revision 1.7 retrieving revision 1.8 diff -u -r1.7 -r1.8 --- SimpleLuceneXMLIndexerImpl.java 22 Feb 2002 07:00:12 -0000 1.7 +++ SimpleLuceneXMLIndexerImpl.java 27 Feb 2002 05:28:38 -0000 1.8 @@ -97,7 +97,7 @@ * A simple class building lucene documents from xml content. * * @author <a href="mailto:[EMAIL PROTECTED]">Bernhard Huber</a> - * @version CVS $Id: SimpleLuceneXMLIndexerImpl.java,v 1.7 2002/02/22 07:00:12 cziegeler Exp $ + * @version CVS $Id: SimpleLuceneXMLIndexerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $ */ public class SimpleLuceneXMLIndexerImpl extends AbstractLoggable implements LuceneXMLIndexer, Configurable, Composable, ThreadSafe @@ -175,6 +175,12 @@ + CONTENT_QUERY); URLConnection contentURLConnection = contentURL.openConnection(); String contentType = contentURLConnection.getContentType(); + + int index = contentType.indexOf(';'); + if (contentType != null && index != -1) { + contentType = contentType.substring(0, index); + } + if (contentType != null && allowedContentType.contains(contentType)) {
---------------------------------------------------------------------- In case of troubles, e-mail: [EMAIL PROTECTED] To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]