burton 2004/08/13 16:17:19
Modified: feedparser/src/java/org/apache/commons/feedparser/locate
DiscoveryLocator.java
feedparser/src/java/org/apache/commons/feedparser/test
TestFeedLocator.java
Log:
Attribute order is no longer required ... more unit tests...
Revision Changes Path
1.11 +49 -6
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java
Index: DiscoveryLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/DiscoveryLocator.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- DiscoveryLocator.java 4 Aug 2004 22:17:01 -0000 1.10
+++ DiscoveryLocator.java 13 Aug 2004 23:17:18 -0000 1.11
@@ -28,9 +28,18 @@
*/
public class DiscoveryLocator {
- //NOTE: this will break if the attributes aren't in the right order.
- static Pattern pattern =
- Pattern.compile(
"<link[^>]+type=[\"']([^\"']+)[\"'][^>]+href=[\"']([^\"']+)" );
+ /**
+ * Get a FULL link within the content. We then pull the attributes out of
+ * this.
+ */
+ static Pattern element_pattern =
+ Pattern.compile( "<link[^>]+" );
+
+ /**
+ * Regex to match on
+ */
+ static Pattern attr_pattern =
+ Pattern.compile( "([a-zA-Z]+)=[\"']([^\"']+)[\"']" );
static HashSet mediatypes = new HashSet();
@@ -58,16 +67,27 @@
//elements forward until I discover </head>. Also note that this isn't
//doing all feed URLs just the first ones it finds.
- Matcher m = pattern.matcher( content );
+ Matcher m = element_pattern.matcher( content );
while( m.find() ) {
- String type=m.group( 1 );
+ //the value of the link element XML... example:
+
+ // <link rel="alternate"
+ // href="http://www.codinginparadise.org/weblog/atom.xml"
+ // type="application/atom+xml"
+ // title="ATOM" />
+
+ String element = m.group( 0 );
+
+ HashMap attributes = getAttributes( element );
+
+ String type = (String)attributes.get( "type" );
if ( mediatypes.contains( type ) ) {
//expand the href
- String href = m.group( 2 );
+ String href = (String)attributes.get( "href" );
href = ResourceExpander.expand( resource, href );
FeedReference feedReference = new FeedReference( href, type );
@@ -85,6 +105,29 @@
}
return list;
+
+ }
+
+ public static HashMap getAttributes( String link ) {
+
+ HashMap map = new HashMap();
+
+ Matcher m = attr_pattern.matcher( link );
+
+ int index = 0;
+
+ while ( m.find( index ) ) {
+
+ String name = m.group( 1 );
+ String value = m.group( 2 );
+
+ map.put( name, value );
+
+ index = m.end();
+
+ }
+
+ return map;
}
1.2 +10 -7
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestFeedLocator.java
Index: TestFeedLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/test/TestFeedLocator.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TestFeedLocator.java 13 Aug 2004 21:53:57 -0000 1.1
+++ TestFeedLocator.java 13 Aug 2004 23:17:19 -0000 1.2
@@ -60,15 +60,18 @@
throw new Exception( "NO LINKS FOUND" );
}
+ System.out.println( "Atom: " + l.getAdAtomFeed() );
+ System.out.println( "RSS: " + l.getAdRSSFeed() );
+
}
public void test1() throws Exception {
-// doTest( "file:///projects/feedparser/tests/locate1.html" );
-// doTest( "file:///projects/feedparser/tests/locate2.html" );
-// doTest( "file:///projects/feedparser/tests/locate3.html" );
-// doTest( "file:///projects/feedparser/tests/locate4.html" );
-// doTest( "file:///projects/feedparser/tests/locate5.html" );
-// doTest( "file:///projects/feedparser/tests/locate6.html" );
+ doTest( "file:///projects/feedparser/tests/locate1.html" );
+ doTest( "file:///projects/feedparser/tests/locate2.html" );
+ doTest( "file:///projects/feedparser/tests/locate3.html" );
+ doTest( "file:///projects/feedparser/tests/locate4.html" );
+ doTest( "file:///projects/feedparser/tests/locate5.html" );
+ doTest( "file:///projects/feedparser/tests/locate6.html" );
doTest( "file:///projects/feedparser/tests/locate7.html" );
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]