burton 2004/09/22 18:08:31
Modified: feedparser/src/java/org/apache/commons/feedparser
FeedList.java
feedparser/src/java/org/apache/commons/feedparser/locate
BlogService.java BlogServiceDiscovery.java
FeedLocator.java ProbeLocator.java
Log:
commit of brads textamerica patches...
Revision Changes Path
1.3 +7 -1
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedList.java
Index: FeedList.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/FeedList.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- FeedList.java 20 Aug 2004 21:44:06 -0000 1.2
+++ FeedList.java 23 Sep 2004 01:08:31 -0000 1.3
@@ -104,6 +104,12 @@
setAdRSSFeed( ref );
}
+
+ public void clear() {
+ super.clear();
+ this.adAtomFeed = null;
+ this.adRSSFeed = null;
+ }
}
1.2 +37 -20
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java
Index: BlogService.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogService.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- BlogService.java 31 Aug 2004 23:57:31 -0000 1.1
+++ BlogService.java 23 Sep 2004 01:08:31 -0000 1.2
@@ -23,48 +23,63 @@
* @author BradNeuberg
*/
public class BlogService {
- public static BlogService UNKNOWN = new BlogService(0);
- public static BlogService DIARYLAND = new BlogService(1);
- public static BlogService AOL_JOURNAL = new BlogService(2);
- public static BlogService PMACHINE = new BlogService(3);
- public static BlogService TEXTPATTERN = new BlogService(4);
+ public static BlogService UNKNOWN = new BlogService(0, false);
+ public static BlogService DIARYLAND = new BlogService(1, true);
+ public static BlogService AOL_JOURNAL = new BlogService(2, true);
+ public static BlogService PMACHINE = new BlogService(3, true);
+ public static BlogService TEXTPATTERN = new BlogService(4, true);
/* FIXME: We can't detect Manila sites right now. */
- public static BlogService MANILA = new BlogService(5);
- public static BlogService TYPEPAD = new BlogService(6);
- public static BlogService RADIO_USERLAND = new BlogService(7);
- public static BlogService LIVEJOURNAL = new BlogService(8);
- public static BlogService WORDPRESS = new BlogService(9);
+ public static BlogService MANILA = new BlogService(5, true);
+ public static BlogService TYPEPAD = new BlogService(6, true);
+ public static BlogService RADIO_USERLAND = new BlogService(7, true);
+ public static BlogService LIVEJOURNAL = new BlogService(8, true);
+ public static BlogService WORDPRESS = new BlogService(9, true);
/* FIXME: We can't detect iBlog sites right now. */
- public static BlogService IBLOG = new BlogService(10);
- public static BlogService XANGA = new BlogService(11);
- public static BlogService BLOSXOM = new BlogService(12);
- public static BlogService BLOGGER = new BlogService(13);
- public static BlogService MOVABLE_TYPE = new BlogService(14);
+ public static BlogService IBLOG = new BlogService(10, true);
+ public static BlogService XANGA = new BlogService(11, true);
+ public static BlogService BLOSXOM = new BlogService(12, true);
+ public static BlogService BLOGGER = new BlogService(13, true);
+ public static BlogService MOVABLE_TYPE = new BlogService(14, true);
/** FIXME: No way to detect Expression Engine weblogs right now. */
- public static BlogService EXPRESSION_ENGINE = new BlogService(15);
- public static BlogService GREYMATTER = new BlogService(16);
+ public static BlogService EXPRESSION_ENGINE = new BlogService(15, true);
+ public static BlogService GREYMATTER = new BlogService(16, true);
+ public static BlogService TEXTAMERICA = new BlogService(17, false);
/** The type of BlogService this is, such as BlogService.BLOSXOM. */
private int type;
+ /** Whether we can trust the results of this blog service's autodiscovery
+ * links; for example, TextAmerica returns invalid autodiscovery results.
+ */
+ private boolean hasValidAutodiscovery = false;
+
/** A private constructor to help us do type-safe enumeration. Only called
* from within this class.
*/
- private BlogService(int type) {
+ private BlogService(int type, boolean hasValidAutodiscovery) {
this.type = type;
+ this.hasValidAutodiscovery = hasValidAutodiscovery;
}
public int getType() {
return type;
}
+ /** Returns hether we can trust the results of this blog service's
+ * autodiscovery links. For example, TextAmerica returns invalid
+ * autodiscovery results.
+ */
+ public boolean hasValidAutodiscovery() {
+ return hasValidAutodiscovery;
+ }
+
public String toString() {
// use reflection to get the type string; useful so we don't have to
// maintain a list of types here. Since this is only used for debugging
// purposes its okay to use reflection.
try {
Field fields[] = getClass().getDeclaredFields();
- BlogService compareMe = new BlogService(type);
+ BlogService compareMe = new BlogService(type, hasValidAutodiscovery);
for (int i = 0; i < fields.length; i++) {
// make sure we are dealing with one of our BlogService constants
if (fields[i].getType().equals(this.getClass())) {
@@ -91,7 +106,9 @@
BlogService compareMe = (BlogService)obj;
- return compareMe.getType() == this.type;
+ // we don't need to check the hasValidAutodiscovery value since equality
+ // is determined only by the type
+ return (compareMe.getType() == this.type);
}
public int hashCode() {
1.2 +11 -0
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java
Index: BlogServiceDiscovery.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/BlogServiceDiscovery.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- BlogServiceDiscovery.java 31 Aug 2004 23:57:31 -0000 1.1
+++ BlogServiceDiscovery.java 23 Sep 2004 01:08:31 -0000 1.2
@@ -113,6 +113,9 @@
else if (isTextPattern(resource, content)) {
return BlogService.TEXTPATTERN;
}
+ else if (isTextAmerica(resource, content)) {
+ return BlogService.TEXTAMERICA;
+ }
else {
return BlogService.UNKNOWN;
}
@@ -261,6 +264,14 @@
Matcher blosxomMatcher = blosxomPattern.matcher(content);
results = blosxomMatcher.find();
+
+ return results;
+ }
+
+ protected static boolean isTextAmerica( String resource, String content ) {
+ boolean results = false;
+
+ results = containsDomain(resource, "textamerica.com");
return results;
}
1.20 +6 -8
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java
Index: FeedLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/FeedLocator.java,v
retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- FeedLocator.java 2 Sep 2004 00:36:25 -0000 1.19
+++ FeedLocator.java 23 Sep 2004 01:08:31 -0000 1.20
@@ -88,10 +88,8 @@
//this failed... try probe location. This is more reliable than
//LinkLocation but requires a few more HTTP gets.
- if ( list.size() == 0 ) {
- log.info( "Using ProbeLocator..." );
- ProbeLocator.locate( resource, content, list );
- }
+ log.info( "Using ProbeLocator..." );
+ ProbeLocator.locate( resource, content, list );
return list;
@@ -133,17 +131,17 @@
Iterator it = l.iterator();
if ( it.hasNext() == false ) {
- System.out.println( "NO LINKS FOUND" );
+ log.info( "NO LINKS FOUND" );
}
- System.out.println( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
- System.out.println( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
+ log.info( " FIXME: (debug): AD RSS: " + l.getAdRSSFeed() );
+ log.info( " FIXME: (debug): AD Atom: " + l.getAdAtomFeed() );
while ( it.hasNext() ) {
FeedReference ref = (FeedReference)it.next();
- System.out.println( ref.resource );
+ log.info( ref.resource );
}
1.13 +24 -9
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java
Index: ProbeLocator.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/locate/ProbeLocator.java,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- ProbeLocator.java 14 Sep 2004 01:32:04 -0000 1.12
+++ ProbeLocator.java 23 Sep 2004 01:08:31 -0000 1.13
@@ -101,7 +101,8 @@
{ new FeedReference("atom.xml", FeedReference.ATOM_MEDIA_TYPE) };
FeedReference aolJournalLocations[] =
- { new FeedReference("rss.xml", FeedReference.RSS_MEDIA_TYPE) };
+ { new FeedReference("atom.xml", FeedReference.ATOM_MEDIA_TYPE),
+ new FeedReference("rss.xml", FeedReference.RSS_MEDIA_TYPE) };
FeedReference pmachineLocations[] =
{ new FeedReference("index.xml", FeedReference.RSS_MEDIA_TYPE) };
@@ -138,6 +139,9 @@
FeedReference xangaLocations[] =
{ new FeedReference("rss.aspx?user=", FeedReference.RSS_MEDIA_TYPE) };
+ FeedReference textAmericaLocations[] =
+ { new FeedReference("rss.aspx", FeedReference.RSS_MEDIA_TYPE) };
+
FeedReference unknownLocations[] =
{ new FeedReference("atom.xml",FeedReference.ATOM_MEDIA_TYPE),
new FeedReference("index.rss", FeedReference.RSS_MEDIA_TYPE),
@@ -166,6 +170,7 @@
probeMapping.put( BlogService.IBLOG, iBlogLocations );
probeMapping.put( BlogService.XANGA, xangaLocations);
probeMapping.put( BlogService.UNKNOWN, unknownLocations );
+ probeMapping.put( BlogService.TEXTAMERICA, textAmericaLocations );
}
/**
@@ -175,12 +180,21 @@
public static final List locate( String resource, String content, FeedList list
)
throws Exception {
+ // determine what blog service we are dealing with
+ BlogService blogService = BlogServiceDiscovery.discover( resource, content
);
+
+ // fail-fast if we already have some results and if we determine that
+ // we can trust the results (TextAmerica has invalid autodiscovery,
+ // for example)
+ if ( list.size() > 0 && blogService.hasValidAutodiscovery() )
+ return list;
+ else if ( blogService.hasValidAutodiscovery() == false ) {
+ // clear out the list so far since we can't trust the results
+ list.clear();
+ }
+
if ( BLOG_SERVICE_PROBING_ENABLED || AGGRESIVE_PROBING_ENABLED ) {
-
- // determine what blog service we are dealing with
-
- BlogService blogService = BlogServiceDiscovery.discover( resource,
content );
-
+
String baseFeedPath = getFeedPath( resource );
FeedReference mapping[] = null;
@@ -207,6 +221,7 @@
log.info( "pathToTest = " + pathToTest );
if ( feedExists( pathToTest ) ) {
+ log.info("Feed exists");
FeedReference feedReference = new FeedReference( pathToTest,
mapping[i].type );
feedReference.method = FeedReference.METHOD_PROBE_DISCOVERY;
@@ -333,9 +348,8 @@
public static void main( String[] args ) throws Exception {
- System.out.println( "asdf" );
- System.out.println( getFeedPath( "http://foo.com/bar?cat=dog" ) );
- System.out.println( getFeedPath( "http://foo.com/bar?cat=dog#adf" ) );
+ log.info( getFeedPath( "http://foo.com/bar?cat=dog" ) );
+ log.info( getFeedPath( "http://foo.com/bar?cat=dog#adf" ) );
}
@@ -355,6 +369,7 @@
request.getContentLength();
long response = request.getResponseCode();
+ log.info("response="+response);
return response == 200;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]