Here is another patch to the Feed Parser; this one updates the ProbeLocator.java to better handle Xanga, BlogSpot, and Blosxom blogs, and also updates the TestProbeLocator.java to improve the unit testing.

Brad Neuberg

Patch:

? compile
? feedparser_9_7_2004.patch
? lib
Index: build.xml
===================================================================
RCS file: /home/cvspublic/jakarta-commons-sandbox/feedparser/build.xml,v
retrieving revision 1.7
diff -r1.7 build.xml
31d30
< <!--
37d35
< -->
124d121
< <test name="org.apache.commons.feedparser.test.TestFeedFilter"/>
Index: src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
===================================================================
RCS file: /home/cvspublic/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
retrieving revision 1.11
diff -r1.11 ProbeLocator.java
290,291c290,291
< * "myweblog.php".
< *
---
> * "myweblog.php".
> *
304c304
< protected static String getFeedPath( String resource )
---
> protected static String getFeedPath(String resource)
308,316c308,313
< int end = resource.lastIndexOf( "#" );
<
< if ( end != -1 )
< resource = resource.substring( 0, end );
<
< end = resource.lastIndexOf( "?" );
<
< if ( end != -1 )
< resource = resource.substring( 0, end );
---
> if ( resource.indexOf("#") != -1 ) {
> resource = resource.substring( 0, resource.indexOf("#") );
> }
> else if ( resource.indexOf("?") != -1 ) {
> resource = resource.substring( 0, resource.indexOf("?") );
> }
318c315
< if ( ! resource.endsWith( "blosxom.cgi" ) ) {
---
> if (!resource.endsWith("blosxom.cgi")) {
320c317,321
< resource = fileMatcher.replaceAll("");
---
> if (fileMatcher.find()) {
> String stringToStrip = fileMatcher.group(1);
> int startStrip = resource.indexOf(stringToStrip);
> resource = resource.substring(0, startStrip);
> }
322,323c323,324
<
< if ( ! resource.endsWith( "/" ) ) {
---
>
> if ( !resource.endsWith( "/" ) ) {
329,337c330
<
< public static void main( String[] args ) throws Exception {
<
< System.out.println( "asdf" );
< System.out.println( getFeedPath( "http://foo.com/bar?cat=dog"; ) );
< System.out.println( getFeedPath( "http://foo.com/bar?cat=dog#adf"; ) );
<
< }
<
---
>
354c347
<
---
>
366a360,376
> }
>
> public static void main( String[] args ) throws Exception {
>
> FeedList list = new FeedList();
>
> locate( "http://davebarry.blogspot.com/";, null, list );
> locate( "http://www.livejournal.com/users/jwz";, null, list );
>
> Iterator it = list.iterator();
>
> while ( it.hasNext() ) {
>
> log.info( it.next() );
>
> }
>
Index: src/java/org/apache/commons/feedparser/test/TestProbeLocator.java
===================================================================
RCS file: /home/cvspublic/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestProbeLocator.java,v
retrieving revision 1.4
diff -r1.4 TestProbeLocator.java
31c31
< * @version $Id: TestProbeLocator.java,v 1.4 2004/09/05 22:01:33 burton Exp $
---
> * @version $Id: TestProbeLocator.java,v 1.2 2004/09/02 00:36:25 burton Exp $
252,323d251
< // // This site should have an RSS feed
< // resource = "http://www.xanga.com/home.aspx?user=lithium98";;
< // content = getContent(resource);
< // assertNotNull(content);
< // blogService = BlogServiceDiscovery.discover(resource, content);
< // assertEquals(blogService, BlogService.XANGA);
< // list = new FeedList();
< // ProbeLocator.locate(resource, content, list);
< // assertEquals(list.size(), 1);
< // feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< // assertEquals(feeds.length, 1);
< // assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< // assertNull(feeds[0].title, null);
< // assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< // assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=lithium98";);
< // /* test through the FeedLocator */
< // list = FeedLocator.locate(resource);
< // atomFeed = list.getAdAtomFeed();
< // rssFeed = list.getAdRSSFeed();
< // assertNull(atomFeed);
< // assertNotNull(rssFeed);
< // assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< // assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=lithium98";);
<
< // // This site should have an RSS feed
< // resource = "http://www.xanga.com/home.aspx?user=ChUnSA_86";;
< // content = getContent(resource);
< // assertNotNull(content);
< // blogService = BlogServiceDiscovery.discover(resource, content);
< // assertEquals(blogService, BlogService.XANGA);
< // list = new FeedList();
< // ProbeLocator.locate(resource, content, list);
< // assertEquals(list.size(), 1);
< // feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< // assertEquals(feeds.length, 1);
< // assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< // assertNull(feeds[0].title, null);
< // assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< // assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=ChUnSA_86";);
< // /* test through the FeedLocator */
< // list = FeedLocator.locate(resource);
< // atomFeed = list.getAdAtomFeed();
< // rssFeed = list.getAdRSSFeed();
< // assertNull(atomFeed);
< // assertNotNull(rssFeed);
< // assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< // assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=ChUnSA_86";);
<
< // // This site should have an RSS feed
< // resource = "http://www.xanga.com/home.aspx?user=wdfphillz";;
< // content = getContent(resource);
< // assertNotNull(content);
< // blogService = BlogServiceDiscovery.discover(resource, content);
< // assertEquals(blogService, BlogService.XANGA);
< // list = new FeedList();
< // ProbeLocator.locate(resource, content, list);
< // assertEquals(list.size(), 1);
< // feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< // assertEquals(feeds.length, 1);
< // assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< // assertNull(feeds[0].title, null);
< // assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< // assertEquals(feeds[0].resource, "http://www.xanga.com/rss.aspx?user=wdfphillz";);
< // /* test through the FeedLocator */
< // list = FeedLocator.locate(resource);
< // atomFeed = list.getAdAtomFeed();
< // rssFeed = list.getAdRSSFeed();
< // assertNull(atomFeed);
< // assertNotNull(rssFeed);
< // assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< // assertEquals(rssFeed.resource, "http://www.xanga.com/rss.aspx?user=wdfphillz";);
<
325,326c253
<
< resource = "http://xanga.com/home.aspx?user=joe";;
---
> resource = "http://www.xanga.com/home.aspx?user=lithium98";;
348c275
<
---
>
372c299
<
---
>
396c323
<
---
>
453,479d379
< public static void main( String[] args ) throws Exception {
<
< TestProbeLocator test = new TestProbeLocator( null );
<
< test.test( "http://xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );
<
< test.test( "http://www.xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );
<
< /*test.testBlogger();
< test.testLiveJournal();
< test.testDiaryLand();
< test.testMovableType();*/
< //test.testXanga();
< /*test.testWordPress();
< test.testAOLJournal();
< test.testTypePad();
< test.testGreyMatter();
< test.testPMachine();
< test.testBlosxom();
< test.testRadioUserland();
< test.testTextPattern();*/
< }
<
759,878d658
<
< // This site should have a single RSS feed
< resource = "http://bamph.com";;
< content = getContent(resource);
< assertNotNull(content);
< blogService = BlogServiceDiscovery.discover(resource, content);
< assertEquals(blogService, BlogService.UNKNOWN);
< list = new FeedList();
< ProbeLocator.locate(resource, content, list);
< assertEquals(list.size(), 1);
< feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< assertEquals(feeds.length, 1);
< assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< assertNull(feeds[0].title, null);
< assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(feeds[0].resource, "http://bamph.com/index.xml";);
< /* test through the FeedLocator */
< list = FeedLocator.locate(resource);
< atomFeed = list.getAdAtomFeed();
< rssFeed = list.getAdRSSFeed();
< assertNull(atomFeed);
< assertNotNull(rssFeed);
< assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(rssFeed.resource, "http://bamph.com/index.xml";);
<
< // This site should have a single RSS feed
< resource = "http://bamph.com";;
< content = getContent(resource);
< assertNotNull(content);
< blogService = BlogServiceDiscovery.discover(resource, content);
< assertEquals(blogService, BlogService.UNKNOWN);
< list = new FeedList();
< ProbeLocator.locate(resource, content, list);
< assertEquals(list.size(), 1);
< feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< assertEquals(feeds.length, 1);
< assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< assertNull(feeds[0].title, null);
< assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(feeds[0].resource, "http://bamph.com/index.xml";);
< /* test through the FeedLocator */
< list = FeedLocator.locate(resource);
< atomFeed = list.getAdAtomFeed();
< rssFeed = list.getAdRSSFeed();
< assertNull(atomFeed);
< assertNotNull(rssFeed);
< assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(rssFeed.resource, "http://bamph.com/index.xml";);
<
< // This site should have a single RSS feed
< resource = "http://bamph.com";;
< content = getContent(resource);
< assertNotNull(content);
< blogService = BlogServiceDiscovery.discover(resource, content);
< assertEquals(blogService, BlogService.UNKNOWN);
< list = new FeedList();
< ProbeLocator.locate(resource, content, list);
< assertEquals(list.size(), 1);
< feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< assertEquals(feeds.length, 1);
< assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< assertNull(feeds[0].title, null);
< assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(feeds[0].resource, "http://bamph.com/index.xml";);
< /* test through the FeedLocator */
< list = FeedLocator.locate(resource);
< atomFeed = list.getAdAtomFeed();
< rssFeed = list.getAdRSSFeed();
< assertNull(atomFeed);
< assertNotNull(rssFeed);
< assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(rssFeed.resource, "http://bamph.com/index.xml";);
<
< // This site should have a single RSS feed
< resource = "http://bamph.com";;
< content = getContent(resource);
< assertNotNull(content);
< blogService = BlogServiceDiscovery.discover(resource, content);
< assertEquals(blogService, BlogService.UNKNOWN);
< list = new FeedList();
< ProbeLocator.locate(resource, content, list);
< assertEquals(list.size(), 1);
< feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< assertEquals(feeds.length, 1);
< assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< assertNull(feeds[0].title, null);
< assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(feeds[0].resource, "http://bamph.com/index.xml";);
< /* test through the FeedLocator */
< list = FeedLocator.locate(resource);
< atomFeed = list.getAdAtomFeed();
< rssFeed = list.getAdRSSFeed();
< assertNull(atomFeed);
< assertNotNull(rssFeed);
< assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(rssFeed.resource, "http://bamph.com/index.xml";);
<
< // This site should have a single RSS feed
< resource = "http://bamph.com";;
< content = getContent(resource);
< assertNotNull(content);
< blogService = BlogServiceDiscovery.discover(resource, content);
< assertEquals(blogService, BlogService.UNKNOWN);
< list = new FeedList();
< ProbeLocator.locate(resource, content, list);
< assertEquals(list.size(), 1);
< feeds = (FeedReference[])list.toArray(new FeedReference[list.size()]);
< assertEquals(feeds.length, 1);
< assertEquals(feeds[0].method, FeedReference.METHOD_PROBE_DISCOVERY);
< assertNull(feeds[0].title, null);
< assertEquals(feeds[0].type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(feeds[0].resource, "http://bamph.com/index.xml";);
< /* test through the FeedLocator */
< list = FeedLocator.locate(resource);
< atomFeed = list.getAdAtomFeed();
< rssFeed = list.getAdRSSFeed();
< assertNull(atomFeed);
< assertNotNull(rssFeed);
< assertEquals(rssFeed.type, FeedReference.RSS_MEDIA_TYPE);
< assertEquals(rssFeed.resource, "http://bamph.com/index.xml";);
954c734
<
---
>
1029,1054d808
< public static void main( String[] args ) throws Exception {
<
< TestProbeLocator test = new TestProbeLocator( null );
<
< /*test.test( "http://xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );
<
< test.test( "http://www.xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );*/
<
< test.testBlogger();
< test.testLiveJournal();
< test.testDiaryLand();
< test.testMovableType();
< test.testXanga();
< test.testWordPress();
< test.testAOLJournal();
< test.testTypePad();
< test.testGreyMatter();
< test.testPMachine();
< test.testBlosxom();
< test.testRadioUserland();
< test.testTextPattern();
< }
1079,1104d832
< public static void main( String[] args ) throws Exception {
<
< TestProbeLocator test = new TestProbeLocator( null );
<
< /*test.test( "http://xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );
<
< test.test( "http://www.xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );*/
<
< test.testBlogger();
< test.testLiveJournal();
< test.testDiaryLand();
< test.testMovableType();
< test.testXanga();
< test.testWordPress();
< test.testAOLJournal();
< test.testTypePad();
< test.testGreyMatter();
< test.testPMachine();
< test.testBlosxom();
< test.testRadioUserland();
< test.testTextPattern();
< }
1129,1154d856
< public static void main( String[] args ) throws Exception {
<
< TestProbeLocator test = new TestProbeLocator( null );
<
< /*test.test( "http://xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );
<
< test.test( "http://www.xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );*/
<
< test.testBlogger();
< test.testLiveJournal();
< test.testDiaryLand();
< test.testMovableType();
< test.testXanga();
< test.testWordPress();
< test.testAOLJournal();
< test.testTypePad();
< test.testGreyMatter();
< test.testPMachine();
< test.testBlosxom();
< test.testRadioUserland();
< test.testTextPattern();
< }
1179,1204d880
< public static void main( String[] args ) throws Exception {
<
< TestProbeLocator test = new TestProbeLocator( null );
<
< /*test.test( "http://xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );
<
< test.test( "http://www.xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );*/
<
< test.testBlogger();
< test.testLiveJournal();
< test.testDiaryLand();
< test.testMovableType();
< test.testXanga();
< test.testWordPress();
< test.testAOLJournal();
< test.testTypePad();
< test.testGreyMatter();
< test.testPMachine();
< test.testBlosxom();
< test.testRadioUserland();
< test.testTextPattern();
< }
1229,1254d904
< public static void main( String[] args ) throws Exception {
<
< TestProbeLocator test = new TestProbeLocator( null );
<
< /*test.test( "http://xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );
<
< test.test( "http://www.xanga.com/home.aspx?user=joe";,
< BlogService.XANGA,
< 1 );*/
<
< test.testBlogger();
< test.testLiveJournal();
< test.testDiaryLand();
< test.testMovableType();
< test.testXanga();
< test.testWordPress();
< test.testAOLJournal();
< test.testTypePad();
< test.testGreyMatter();
< test.testPMachine();
< test.testBlosxom();
< test.testRadioUserland();
< test.testTextPattern();
< }
1355c1005
<
---
>




Reply via email to