burton 2004/10/17 17:00:09
Modified: feedparser/src/java/org/apache/commons/feedparser
BaseParser.java RSSFeedParser.java
Added: feedparser/tests/locale rss-2.0-en-on-channel-element.xml
rss-zh-on-channel-element.xml
Log:
more support for languages.. this time RSS 2.0 and RSS 0.91 support with dc:language
and 'language'
Revision Changes Path
1.2 +23 -12
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java
Index: BaseParser.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/BaseParser.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- BaseParser.java 17 Oct 2004 23:43:23 -0000 1.1
+++ BaseParser.java 18 Oct 2004 00:00:09 -0000 1.2
@@ -49,13 +49,11 @@
if ( state.metaFeedParserlistener == null )
return;
- Attribute attr = getLocaleAttribute( element );
+ String l = getLocaleString( element );
- if ( attr != null ) {
+ if ( l != null ) {
- String v = attr.getValue();
-
- Locale locale = RFC3066LocaleParser.parse( v );
+ Locale locale = RFC3066LocaleParser.parse( l );
if ( locale != null )
state.metaFeedParserlistener.onLocale( state, locale );
@@ -72,23 +70,36 @@
if ( state.metaFeedParserlistener == null )
return;
- Attribute attr = getLocaleAttribute( element );
+ String l = getLocaleString( element );
- if ( attr != null )
+ if ( l != null )
state.metaFeedParserlistener.onLocaleEnd();
}
- protected static Attribute getLocaleAttribute( Element element ) {
+ protected static String getLocaleString( Element element ) {
//hm.. crap. how do we get the 'xml' namespace here?
Attribute attr = element.getAttribute( "lang" );
+ if ( attr != null )
+ return attr.getValue();
+
//when stil null see that we have dc:language
- if ( attr == null )
- attr = element.getAttribute( "language", NS.DC );
- return attr;
+ Element lang = element.getChild( "language", NS.DC );
+
+ if ( lang != null )
+ return lang.getText();
+
+ //fall over to just using "language" and if it isn't a local string we
+ //won't parse it. This is for RSS 0.91 and RSS 2.0 content.
+ lang = element.getChild( "language" );
+
+ if ( lang != null )
+ return lang.getText();
+
+ return null;
}
1.13 +12 -8
jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java
Index: RSSFeedParser.java
===================================================================
RCS file:
/home/cvs/jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/RSSFeedParser.java,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- RSSFeedParser.java 3 Sep 2004 19:46:47 -0000 1.12
+++ RSSFeedParser.java 18 Oct 2004 00:00:09 -0000 1.13
@@ -38,7 +38,7 @@
* @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton (burtonator)</a>
* @version $Id$
*/
-public class RSSFeedParser {
+public class RSSFeedParser extends BaseParser {
/**
* Parse the given document as an OPML document.
@@ -48,7 +48,7 @@
public static void parse( FeedParserListener listener,
org.jdom.Document doc ) throws Exception {
- FeedParserState state = new FeedParserState();
+ FeedParserState state = new FeedParserState( listener );
FeedVersion v = new FeedVersion();
v.isRSS = true;
@@ -60,7 +60,9 @@
XPath xpath = new XPath( "/descendant::*[local-name() = 'channel']" );
Element channel = (Element)xpath.selectSingleNode( doc );
state.current = channel;
- doParseChannel( listener, state );
+ doLocale( state, listener, channel );
+ doChannel( listener, state );
+ doLocaleEnd( state, listener, channel );
//*** now process the image. ***
xpath = new XPath( "/descendant::*[local-name() = 'image']" );
@@ -85,11 +87,13 @@
//update items.
while ( i.hasNext() ) {
- Element child = (Element)i.next();
+ Element item = (Element)i.next();
- state.current = child;
+ state.current = item;
+ doLocale( state, listener, item );
doParseItem( listener, state );
+ doLocaleEnd( state, listener, item );
}
@@ -102,8 +106,8 @@
*
* @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
*/
- private static void doParseChannel( FeedParserListener listener,
- FeedParserState state ) throws Exception {
+ private static void doChannel( FeedParserListener listener,
+ FeedParserState state ) throws Exception {
String link = getChildElementTextByName( state, "link" );
1.1
jakarta-commons-sandbox/feedparser/tests/locale/rss-2.0-en-on-channel-element.xml
Index: rss-2.0-en-on-channel-element.xml
===================================================================
<?xml version="1.0"?>
<!-- RSS generated by UserLand Frontier v9.0.1 on 10/17/2004; 2:40:35 PM Pacific -->
<rss version="2.0">
<channel>
<title>Scripting News</title>
<link>http://www.scripting.com/</link>
<description>It's even worse than it appears.</description>
<language>en-us</language>
<copyright>Copyright 1997-2004 Dave Winer</copyright>
<pubDate>Sun, 17 Oct 2004 07:00:00 GMT</pubDate>
<lastBuildDate>Sun, 17 Oct 2004 21:40:35 GMT</lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<generator>UserLand Frontier v9.0.1</generator>
<managingEditor>[EMAIL PROTECTED]</managingEditor>
<webMaster>[EMAIL PROTECTED]</webMaster>
<item>
<description><a
href="http://www.dawnanddrew.com/archives/001039.php">I just
listened</a> to my first Dawn & Drew podcast. They're awesome. </description>
<pubDate>Sun, 17 Oct 2004 19:45:58 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:12:45:58PM</guid>
</item>
<item>
<description>If newspaper <a
href="http://www.command-post.org/2004/2_archives/016036.html">endorsements</a>
were votes, Kerry wins Florida in a landslide.</description>
<pubDate>Sun, 17 Oct 2004 21:40:33 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:2:40:33PM</guid>
</item>
<item>
<description><a
href="http://www.scripting.com/images/2001/09/15/usFlag.gif"
title="THINK!"><img
src="http://www.scripting.com/images/archiveScriptingCom/2004/05/31/think.gif"
height="59" width="69" border="0" hspace="15"
vspace="15" align="right" alt="THINK!"></a>Watching
<a href="http://frist.senate.gov/">Bill Frist</a>, Senate
Majority Leader, blame Kerry and Edwards for the failure of the Congress to pass tort
reform. I thought I was hearing Bush do the same thing in the last debate. I wonder
how many Americans know that Congress is controlled by the Republicans. They talk as
if there were an adversarial relationship between the legislative and executive
branches, which would lead people to draw the incorrect conclusion that the Democrats
are responsible. I think today we got a preview of the final onslaught of ads the
Republicans are going to run, and there won't be an opportunity to explain that the
Democrats don't run Congress. Like the lie that Saddam Hussein was in league with
Osama bin Laden, the Republicans don't mind if you draw the wrong conclusion, in fact,
they'll help you do it. </description>
<pubDate>Sun, 17 Oct 2004 17:44:57 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:10:44:57AM</guid>
</item>
<item>
<description><a
href="http://www.nytimes.com/2004/10/17/magazine/17BUSH.html?ei=5088&en=6a9ce1d022952b10&ex=1255752000&partner=rssnyt&pagewanted=print&position=">NY
Times Magazine</a>, quoting a senior White House official, in 2002: "We're
an empire now, and when we act, we create our own reality."</description>
<pubDate>Sun, 17 Oct 2004 18:46:03 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:11:46:03AM</guid>
</item>
<item>
<description><a
href="http://www.bloggercon.org/2004/10/17#a1605">Adam Curry</a>:
"A lot of people have been questioning the use of licensed music in Podcasts and
I too feel that the time has come to face any legal ramifications of this audio wave
we're riding now, and not let it take us by surprise."</description>
<pubDate>Sun, 17 Oct 2004 17:24:24 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:10:24:24AM</guid>
</item>
<item>
<description><a
href="http://www.sfgate.com/cgi-bin/article.cgi?file=/chronicle/archive/2004/10/17/EDG8O98IQ01.DTL">Mitch
Kapor</a>: "We were never meant to have a highly centralized
government."</description>
<pubDate>Sun, 17 Oct 2004 17:21:02 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:10:21:02AM</guid>
</item>
<item>
<description><img
src="http://images.scripting.com/archiveScriptingCom/2004/10/17/lessig.jpg"
width="45" height="57" border="0"
align="right" hspace="15" vspace="5" alt="A picture
named lessig.jpg">Emailing with Larry Lessig today, he said something
surprising about <a href="http://creativecommons.org/">Creative
Commons</a>. "No author gives up his copyright when putting content under a
CC license. A CC license is just permissions given up front. It rests upon a copyright
(without the copyright, you couldn't impose the permissions). But the copyright owner
holds the copyright, and just says, 'here's how you're free to use my work.'"
</description>
<pubDate>Sun, 17 Oct 2004 15:57:45 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:8:57:45AM</guid>
</item>
<item>
<description>Doug Kaye <a
href="http://www.itconversations.com/shows/detail225.html">interview</a>
with Adam Curry.</description>
<pubDate>Sun, 17 Oct 2004 12:15:27 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:5:15:27AM</guid>
</item>
<item>
<description><a
href="http://www.iol.co.za/index.php?set_id=1&click_id=2969">RSS news
feeds</a> from South Africa.</description>
<pubDate>Sun, 17 Oct 2004 14:21:26 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:7:21:26AM</guid>
<category>/Technology/Formats and Protocols/RSS</category>
</item>
<item>
<description><a
href="http://static.podcatch.com/manila/gems/un/eps.mp3">This is a
test</a>. For the next sixty seconds this station will conduct a test of the
Emergency Podcast System. </description>
<pubDate>Sun, 17 Oct 2004 17:36:38 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:10:36:38AM</guid>
<enclosure
url="http://static.podcatch.com/manila/gems/un/eps.mp3" length="189455"
type="audio/mpeg" />
</item>
<item>
<description><a
href="http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3">I
got another</a> test blog post. An audio test blog post. Pay no attention to
the man behind the curtain.</description>
<pubDate>Sun, 17 Oct 2004 19:11:14 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:12:11:14PM</guid>
<enclosure
url="http://static.podcatch.com/manila/gems/un/anotherTestAudioBlogPost.mp3"
length="106423" type="audio/mpeg" />
</item>
<item>
<description><a
href="http://www.mediainfo.com/eandp/news/article_display.jsp?vnu_content_id=1000671941">Editor
& Publisher</a> has a list of presidential endorsements.</description>
<pubDate>Sun, 17 Oct 2004 12:42:24 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:5:42:24AM</guid>
</item>
<item>
<description><a
href="http://www.undergroundclips.com/undergroundclips/2004/10/richard_clark_o.html">Undergroundclips</a>
has the 60 Minutes interview with Richard Clark.</description>
<pubDate>Sun, 17 Oct 2004 12:28:53 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:5:28:53AM</guid>
</item>
<item>
<description><a
href="http://www.nytimes.com/2004/10/17/arts/17rich.html?ex=1255752000&en=ca7f76fa80642517&ei=5088&partner=rssnyt">Frank
Rich</a>: "Like the Nixon administration before it, the current White
House has kneecapped with impunity any news organization that challenges its
message."</description>
<pubDate>Sun, 17 Oct 2004 14:16:02 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:7:16:02AM</guid>
</item>
<item>
<description>The Boston Globe <a
href="http://www.boston.com/business/articles/2004/10/17/harvards12_billion_man/?rss_id=Boston%20Globe%20--%20Business%20News">profiles</a>
Jack Meyer, the investment banker who's in charge of Harvard's $22 billion
endowment.</description>
<pubDate>Sun, 17 Oct 2004 12:30:44 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:5:30:44AM</guid>
</item>
<item>
<description><img
src="http://images.scripting.com/archiveScriptingCom/2004/10/17/dubya.jpg"
width="45" height="61" border="0"
align="right" hspace="15" vspace="5" alt="A picture
named dubya.jpg">In a speech yesterday Bush said we will not have an
all-volunteer army. A few in the audience shouted, and he <a
href="http://www.turkishpress.com/turkishpress/news.asp?ID=30912">flipped</a>
it around. They chuckle when Bush makes a mistake, but what if Kerry had said it? Do
you think the Republicans would have mocked him? Yeah, I think so. I think the Dems
should run that flip-flop as an ad. Fair is fair. And unfair is fair in this
election.</description>
<pubDate>Sun, 17 Oct 2004 12:03:56 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:5:03:56AM</guid>
</item>
<item>
<description><a
href="http://jeremy.zawodny.com/blog/archives/002826.html">Jeremy
Zawodny</a>, who works at Yahoo, says MSNBC ripped them off. </description>
<pubDate>Sun, 17 Oct 2004 12:01:55 GMT</pubDate>
<guid>http://archive.scripting.com/2004/10/17#When:5:01:55AM</guid>
</item>
</channel>
</rss>
1.1
jakarta-commons-sandbox/feedparser/tests/locale/rss-zh-on-channel-element.xml
Index: rss-zh-on-channel-element.xml
===================================================================
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:trackback="http://madskills.com/public/xml/rss/module/trackback/"
xmlns:wfw="http://wellformedweb.org/CommentAPI/"
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"><channel><title>beiqiao的专栏</title><link>http://blog.csdn.net/beiqiao/</link><description
/><dc:language>zh-CHS</dc:language><generator>.Text Version
0.958.2004.2001</generator><item><dc:creator>beiqiao</dc:creator><title>修复IE</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</link><pubDate>Tue,
12 Oct 2004 00:43:00
GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/132900.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/12/132900.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/132900.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/132900.aspx</trackback:ping><description>发现中毒了,每次启动IE,会打开本地一个sp.html文件。这个文件放在windows临时目录下,即使被删除,IE启动后,又被生成。同时注册表中HKEY_LOCAL_MECHINE\Microsoft\Internet
Explorer\main\search bar、search page;HKEY_LOCAL_MECHINE\Microsoft\Internet
Explorer\search键值被修改成指向本地的sp.html文件,修改掉这些键值后,打开IE,又被改回来了。<img
src ="http://blog.csdn.net/beiqiao/aggbug/132900.aspx" width = "1" height = "1"
/></description></item><item><dc:creator>beiqiao</dc:creator><title>Linux相关命令</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</link><pubDate>Mon,
11 Oct 2004 10:55:00
GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/131800.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/11/131800.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/131800.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/131800.aspx</trackback:ping><description>列出常用Linux命令以备查询<img
src ="http://blog.csdn.net/beiqiao/aggbug/131800.aspx" width = "1" height = "1"
/></description></item><item><dc:creator>beiqiao</dc:creator><title>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题</title><link>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</link><pubDate>Sun,
10 Oct 2004 16:49:00
GMT</pubDate><guid>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx</guid><wfw:comment>http://blog.csdn.net/beiqiao/comments/130966.aspx</wfw:comment><comments>http://blog.csdn.net/beiqiao/archive/2004/10/10/130966.aspx#Feedback</comments><slash:comments>0</slash:comments><wfw:commentRss>http://blog.csdn.net/beiqiao/comments/commentRss/130966.aspx</wfw:commentRss><trackback:ping>http://blog.csdn.net/beiqiao/services/trackbacks/130966.aspx</trackback:ping><description>使用Jmeter对Mysql进行压力测试无法执行多条sql语句问题<img
src ="http://blog.csdn.net/beiqiao/aggbug/130966.aspx" width = "1" height = "1"
/></description></item></channel></rss>
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]