burton 2004/09/30 11:00:33
Modified: feedparser/src/java/org/apache/commons/feedparser/locate
BlogService.java BlogServiceDiscovery.java
FeedLocator.java ProbeLocator.java
Log:
Fix for yahoo groups
Revision Changes Path
1.3 +68 -18
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java
Index: BlogService.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- BlogService.java 23 Sep 2004 01:08:31 -0000 1.2
+++ BlogService.java 30 Sep 2004 18:00:32 -0000 1.3
@@ -23,27 +23,59 @@
* @author BradNeuberg
*/
public class BlogService {
- public static BlogService UNKNOWN = new BlogService(0, false);
- public static BlogService DIARYLAND = new BlogService(1, true);
- public static BlogService AOL_JOURNAL = new BlogService(2, true);
- public static BlogService PMACHINE = new BlogService(3, true);
- public static BlogService TEXTPATTERN = new BlogService(4, true);
+
+ public static BlogService UNKNOWN = new BlogService(0, false);
+ public static BlogService DIARYLAND = new BlogService(1, true);
+ public static BlogService AOL_JOURNAL = new BlogService(2, true);
+ public static BlogService PMACHINE = new BlogService(3, true);
+ public static BlogService TEXTPATTERN = new BlogService(4, true);
/* FIXME: We can't detect Manila sites right now. */
- public static BlogService MANILA = new BlogService(5, true);
- public static BlogService TYPEPAD = new BlogService(6, true);
- public static BlogService RADIO_USERLAND = new BlogService(7, true);
- public static BlogService LIVEJOURNAL = new BlogService(8, true);
- public static BlogService WORDPRESS = new BlogService(9, true);
+ public static BlogService MANILA = new BlogService(5, true);
+ public static BlogService TYPEPAD = new BlogService(6, true);
+ public static BlogService RADIO_USERLAND = new BlogService(7, true);
+ public static BlogService LIVEJOURNAL = new BlogService(8, true);
+ public static BlogService WORDPRESS = new BlogService(9, true);
/* FIXME: We can't detect iBlog sites right now. */
- public static BlogService IBLOG = new BlogService(10, true);
- public static BlogService XANGA = new BlogService(11, true);
- public static BlogService BLOSXOM = new BlogService(12, true);
- public static BlogService BLOGGER = new BlogService(13, true);
- public static BlogService MOVABLE_TYPE = new BlogService(14, true);
+ public static BlogService IBLOG = new BlogService(10, true);
+ public static BlogService XANGA = new BlogService(11, true);
+ public static BlogService BLOSXOM = new BlogService(12, true);
+ public static BlogService BLOGGER = new BlogService(13, true);
+ public static BlogService MOVABLE_TYPE = new BlogService(14, true);
/** FIXME: No way to detect Expression Engine weblogs right now. */
- public static BlogService EXPRESSION_ENGINE = new BlogService(15, true);
- public static BlogService GREYMATTER = new BlogService(16, true);
- public static BlogService TEXTAMERICA = new BlogService(17, false);
+ public static BlogService EXPRESSION_ENGINE = new BlogService(15, true);
+ public static BlogService GREYMATTER = new BlogService(16, true);
+ public static BlogService TEXTAMERICA = new BlogService(17, false);
+
+ public static BlogService YAHOOGROUPS = new BlogService(18, false) {
+
+ public String getFeedResource( String resource ) {
+
+// * Input: http://groups.yahoo.com/group/aggregators/
+// *
+// * Output: http://rss.groups.yahoo.com/group/aggregators/rss
+
+ if ( resource == null )
+ return null;
+
+ if ( resource.indexOf( "/group/" ) != -1 &&
+ resource.indexOf( "groups.yahoo.com" ) != -1 ) {
+
+ resource = "http://rss." +
+ resource.substring( "http://".length(), resource.length() )
+ ;
+
+ if ( resource.endsWith( "/" ) ) {
+ resource += "rss";
+ } else {
+ resource += "/rss";
+ }
+
+ }
+
+ return resource;
+ }
+
+ };
/** The type of BlogService this is, such as BlogService.BLOSXOM. */
private int type;
@@ -71,6 +103,24 @@
*/
public boolean hasValidAutodiscovery() {
return hasValidAutodiscovery;
+ }
+
+ /**
+ * Get the primary feed resource for this service to test.
+ *
+ * For example we're given the html URL and need to return the Feed URL to
+ * test.
+ *
+ * Input: http://groups.yahoo.com/group/aggregators/
+ *
+ * Output: http://rss.groups.yahoo.com/group/aggregators/rss
+ *
+ * If no changes are necessary just return the input resource.
+ *
+ * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
+ */
+ public String getFeedResource( String resource ) {
+ return resource;
}
public String toString() {
1.3 +22 -5
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java
Index: BlogServiceDiscovery.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- BlogServiceDiscovery.java 23 Sep 2004 01:08:31 -0000 1.2
+++ BlogServiceDiscovery.java 30 Sep 2004 18:00:32 -0000 1.3
@@ -56,6 +56,8 @@
public static BlogService discover( String resource ) {
return discoverService( resource, null );
}
+
+ // NOTE: ALL of this code should change to a visitor pattern. Bad design.
protected static BlogService discoverService( String resource,
String content ) {
@@ -107,20 +109,25 @@
else if (isManila(resource, content)) {
return BlogService.MANILA;
}*/
- else if (isRadioUserland(resource, content)) {
+ else if ( isRadioUserland( resource, content ) ) {
return BlogService.RADIO_USERLAND;
}
- else if (isTextPattern(resource, content)) {
+ else if ( isTextPattern( resource, content ) ) {
return BlogService.TEXTPATTERN;
}
- else if (isTextAmerica(resource, content)) {
+ else if ( isTextAmerica( resource, content ) ) {
return BlogService.TEXTAMERICA;
}
+ else if ( isYahooGroups( resource, content ) ) {
+ return BlogService.YAHOOGROUPS;
+ }
else {
return BlogService.UNKNOWN;
}
}
-
+
+ // **** vendor specific CMS detection code **********************************
+
protected static boolean isBlogger( String resource, String content ) {
boolean results = false;
@@ -275,7 +282,17 @@
return results;
}
-
+
+ protected static boolean isYahooGroups( String resource, String content ) {
+ boolean results = false;
+
+ results = containsDomain( resource, "groups.yahoo.com" );
+
+ return results;
+ }
+
+ // **** util code ***********************************************************
+
/** Determines if the given resource contains the given domain name
* fragment.
*/
1.21 +10 -5
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
Index: FeedLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- FeedLocator.java 23 Sep 2004 01:08:31 -0000 1.20
+++ FeedLocator.java 30 Sep 2004 18:00:32 -0000 1.21
@@ -104,7 +104,9 @@
//String resource = "file:///projects/feedparser/tests/locate5.html";
//String resource = "file:///projects/feedparser/tests/locate6.html";
- String resource = "file:///projects/feedparser/tests/locate8.html";
+ String resource = "http://groups.yahoo.com/group/aggregators/";
+
+ //String resource = "file:///projects/feedparser/tests/locate8.html";
//String resource = "http://blogs.sun.com/roller/page/gonzo";
@@ -126,22 +128,25 @@
//String resource = "http://www.corante.com/strange/";
//String resource = "http://peerfear.org";
+ ProbeLocator.BLOG_SERVICE_PROBING_ENABLED = true;
+ ProbeLocator.AGGRESIVE_PROBING_ENABLED = true;
+
FeedList l = locate( resource );
Iterator it = l.iterator();
if ( it.hasNext() == false ) {
- log.info( "NO LINKS FOUND" );
+ System.out.println( "NO LINKS FOUND" );
}
- log.info( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
- log.info( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
+ System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
+ System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
while ( it.hasNext() ) {
FeedReference ref = (FeedReference)it.next();
- log.info( ref.resource );
+ System.out.println( ref.resource );
}
1.14 +17 -2
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
Index: ProbeLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- ProbeLocator.java 23 Sep 2004 01:08:31 -0000 1.13
+++ ProbeLocator.java 30 Sep 2004 18:00:32 -0000 1.14
@@ -150,6 +150,9 @@
new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE),
new FeedReference("xml/rss.xml", FeedReference.RSS_MEDIA_TYPE) };
+ FeedReference yahooGroupsLocations[] =
+ { new FeedReference( "", FeedReference.RSS_MEDIA_TYPE) };
+
probeMapping.put( BlogService.BLOSXOM, blosxomLocations );
//Tue Aug 31 2004 04:21 PM ([EMAIL PROTECTED]): Diaryland doesn't
@@ -169,8 +172,11 @@
probeMapping.put( BlogService.WORDPRESS, wordPressLocations );
probeMapping.put( BlogService.IBLOG, iBlogLocations );
probeMapping.put( BlogService.XANGA, xangaLocations);
+ probeMapping.put( BlogService.YAHOOGROUPS, yahooGroupsLocations);
+
probeMapping.put( BlogService.UNKNOWN, unknownLocations );
probeMapping.put( BlogService.TEXTAMERICA, textAmericaLocations );
+
}
/**
@@ -192,9 +198,9 @@
// clear out the list so far since we can't trust the results
list.clear();
}
-
+
if ( BLOG_SERVICE_PROBING_ENABLED || AGGRESIVE_PROBING_ENABLED ) {
-
+
String baseFeedPath = getFeedPath( resource );
FeedReference mapping[] = null;
@@ -212,10 +218,19 @@
// try out each mapping
for (int i = 0; i < mapping.length; i++) {
String pathToTest = baseFeedPath + mapping[i].resource;
+
+ //FIXME: generalize this in the future. We should NOT have
+ //custom tests here.
// we have to do special probing for Xanga
if ( blogService.equals( BlogService.XANGA ) ) {
pathToTest += getXangaUser(resource);
+ }
+
+ if ( blogService.equals( BlogService.YAHOOGROUPS ) ) {
+
+ pathToTest = BlogService.YAHOOGROUPS.getFeedResource(
resource );
+
}
log.info( "pathToTest = " + pathToTest );
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]