burton 2004/08/05 18:12:13
Modified: feedparser/src/java/org/apache/commons/feedparser/locate
FeedLocator.java LinkLocator.java
Log:
We're a bit more aggressive about doing LinkLocation... we also try to handle using
RSS formats correctly and prefer richer metadata
Revision Changes Path
1.9 +12 -8
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
Index: FeedLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- FeedLocator.java 4 Aug 2004 22:17:01 -0000 1.8
+++ FeedLocator.java 6 Aug 2004 01:12:12 -0000 1.9
@@ -64,14 +64,13 @@
DiscoveryLocator.locate( resource, content, list );
- //this failed... try probe location
- //FIXME: if we still fail try location link probing /index.rdf, /index.xml
- if ( list.size() == 0 )
- ProbeLocator.locate( resource, content, list );
-
//this failed... try looking for links
+ LinkLocator.locate( resource, content, list );
+
+ //this failed... try probe location. This is more reliable than
+ //LinkLocation but requires a few more HTTP gets.
if ( list.size() == 0 )
- LinkLocator.locate( resource, content, list );
+ ProbeLocator.locate( resource, content, list );
//FIXME: if we faile to locate with location with link discovery.
@@ -87,12 +86,14 @@
public static void main( String[] args ) throws Exception {
//This should find http://www.electoral-vote.com/index.rss
- String resource = "http://www.electoral-vote.com/";
+ //String resource = "http://brendonwilson.com/";
+
+ String resource = "file:///projects/feedparser/tests/locate4.html";
//String resource = "http://www.corante.com/strange/";
//String resource = "http://peerfear.org";
- List l = locate( resource );
+ FeedList l = locate( resource );
Iterator it = l.iterator();
@@ -100,6 +101,9 @@
System.out.println( "NO LINKS FOUND" );
}
+ System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
+ System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
+
while ( it.hasNext() ) {
FeedReference ref = (FeedReference)it.next();
1.4 +54 -4
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java
Index: LinkLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/LinkLocator.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- LinkLocator.java 4 Aug 2004 22:17:02 -0000 1.3
+++ LinkLocator.java 6 Aug 2004 01:12:12 -0000 1.4
@@ -40,6 +40,13 @@
final FeedList list )
throws Exception {
+ /**
+ * When we have been given feeds at a higher level (via <link rel> tags
+ * we should prefer these.
+ */
+ final boolean hasExplicitRSSFeed = list.getAdRSSFeed() != null;
+ final boolean hasExplicitAtomFeed = list.getAdRSSFeed() != null;
+
AnchorParserListener listener = new AnchorParserListener() {
String resource = null;
@@ -48,6 +55,9 @@
HashSet seen = new HashSet();
+ boolean hasFoundRSSFeed = false;
+ boolean hasFoundAtomFeed = false;
+
public void setContext( Object context ) {
resource = (String)context;
@@ -60,11 +70,13 @@
public Object getResult() {
return list;
}
-
+
public boolean onAnchor( String href, String rel, String title ) {
String current = ResourceExpander.expand( resource, href );
+ System.out.println( " FIXME: (debug): current: " + current );
+
if ( current == null )
return true; //obviously not
@@ -103,8 +115,13 @@
FeedReference ref = new FeedReference( current,
FeedReference.RSS_MEDIA_TYPE );
+ //Make sure to preserve existing AD feeds first.
+ if ( ! hasExplicitRSSFeed )
+ list.setAdRSSFeed( ref );
+
list.add( ref );
- list.setAdRSSFeed( ref );
+
+ hasFoundRSSFeed = true;
}
@@ -113,16 +130,49 @@
FeedReference ref = new FeedReference( current,
FeedReference.ATOM_MEDIA_TYPE );
+ //Make sure to preserve existing AD feeds first.
+ if ( ! hasExplicitAtomFeed )
+ list.setAdAtomFeed( ref );
+
list.add( ref );
- list.setAdAtomFeed( ref );
+
+ hasFoundAtomFeed = true;
}
if ( current.endsWith( ".xml" ) ||
current.endsWith( ".rdf" ) ) {
+ //NOTE that we do allow autodiscovery forfor index.xml
+ //and index.rdf files but we don't prefer them since
+ //these extensions are generic. We would prefer to use
+ //index.rss or even Atom (though people tend to use Atom
+ //autodiscovery now). This is important because if we
+ //spit back an index.xml file thats NOT RSS or worse an
+ //index.rdf file thats FOAF then we might break callers.
+
+ FeedReference ref = new FeedReference( current,
+
FeedReference.ATOM_MEDIA_TYPE );
+
+ //see if we should RESORT to using this.
+
+ if ( ! hasExplicitRSSFeed && ! hasFoundRSSFeed ) {
+
+ //NOTE: when we have found an existing RDF file use
+ //that instead.. This is probably RSS 1.0 which is
+ //much better than RSS 0.91
+
+ if ( list.getAdRSSFeed() == null ||
+ list.getAdRSSFeed().resource.endsWith( ".rdf" ) ==
false ) {
+
+ list.setAdRSSFeed( ref );
+
+ }
+
+ }
+
//feed for this blog.
- list.add( current );
+ list.add( ref );
return true;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]