Author: burton
Date: Sun Feb 6 00:29:56 2005
New Revision: 151555
URL: http://svn.apache.org/viewcvs?view=rev&rev=151555
Log:
Fixed potential bug (but worried about regression) with accented text in XML
Modified:
jakarta/commons/sandbox/feedparser/trunk/TODO
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml
Modified: jakarta/commons/sandbox/feedparser/trunk/TODO
URL:
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/TODO?view=diff&r1=151554&r2=151555
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/TODO (original)
+++ jakarta/commons/sandbox/feedparser/trunk/TODO Sun Feb 6 00:29:56 2005
@@ -3,12 +3,15 @@
- Get viewcvs linked to the app
- - Nightly builds
+ - Nightly builds?
- 0.5 public release
http://jakarta.apache.org/commons/releases/index.html
+
+
+
- maven?
- (DONE) All FeedParser exceptions should include the URL of the feed if
@@ -26,6 +29,16 @@
- (DONE) Rework the factory mechanism to support multiple FeedParsers... should
be an interface.
+
+- How do I want to maintain a public changelog?
+
+- How do I want to maintain a public TODO?
+
+- Fix the feedparsing bug where we'll drop chars:
+
+ current-broken-drop-accents.atom
+
+ The bug is in getCorrectInputStream
- Networking layer should support per-request UserAgent settings. This should
just be a request header I think
Modified:
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
URL:
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java?view=diff&r1=151554&r2=151555
==============================================================================
---
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
(original)
+++
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
Sun Feb 6 00:29:56 2005
@@ -106,10 +106,10 @@
if (encoding == null)
encoding = "UTF-8";
- if (encoding.startsWith("UTF")) {
+ if ( encoding.startsWith( "UTF" ) ) {
- String result = XMLCleanser.cleanse(bytes, encoding);
- bytes = FeedFilter.parse(result, encoding);
+ String result = XMLCleanser.cleanse( bytes, encoding );
+ bytes = FeedFilter.parse( result, encoding );
} else {
@@ -120,7 +120,7 @@
//remove prefix whitespace, intern HTML entities, etc.
//build an input stream from the our bytes for parsing...
- is = new ByteArrayInputStream(bytes);
+ is = new ByteArrayInputStream( bytes );
return is;
Modified:
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
URL:
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java?view=diff&r1=151554&r2=151555
==============================================================================
---
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
(original)
+++
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
Sun Feb 6 00:29:56 2005
@@ -22,7 +22,7 @@
/**
*
* @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
- * @version $Id: NetworkException.java,v 1.1 2005/01/25 07:55:19 burton Exp $
+ * @version $Id$
*/
public class NetworkException extends IOException {
@@ -109,6 +109,17 @@
public int getResponseCode() {
+ //FIXME:
+ // java.lang.NumberFormatException: For input string: "fie"
+ // at
java.lang.NumberFormatException.forInputString(NumberFormatException.java:48)
+ // at java.lang.Integer.parseInt(Integer.java:468)
+ // at java.lang.Integer.parseInt(Integer.java:518)
+ // at
org.peerfear.newsmonster.network.NetworkException.getResponseCode(NetworkException.java:142)
+ // at ksa.robot.FeedTask._doTaskLogFailure(FeedTask.java:264)
+ // at ksa.robot.FeedTask.run(FeedTask.java:202)
+ // at ksa.robot.TaskThread.doProcessTask(TaskThread.java:298)
+ // at ksa.robot.TaskThread.run(TaskThread.java:111)
+
if ( _urlConnection == null ) {
return -1;
}
Modified:
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
URL:
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java?view=diff&r1=151554&r2=151555
==============================================================================
---
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
(original)
+++
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
Sun Feb 6 00:29:56 2005
@@ -20,10 +20,10 @@
* Class that can cleanse a string so that nothing can be present to break an
* XML parser. This is a VERY non-portable class as it is meant to work just
* with Xalan/Xerces and may remove more text and replace things that are
- * non-XML centric.
+ * non-XML centric.
*
* @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
- * @version $Id: XMLCleanser.java,v 1.2 2004/09/03 19:46:47 burton Exp $
+ * @version $Id$
*/
public class XMLCleanser {
@@ -125,26 +125,43 @@
}
/*
- * This is a utility function for determining whether a specified
- * character is a character according to production 2 of the
- * XML 1.0 specification.
+ * This is a utility function for determining whether a specified character
+ * is a character according to production 2 of the XML 1.0 specification.
*
* @param c <code>char</code> to check for XML compliance.
- * @return <code>boolean</code> - true if it's a character,
- * false otherwise.
+
+ * @return <code>boolean</code> - true if it's a character, false
otherwise.
*/
- public static boolean isXMLCharacter(char c) {
+ public static boolean isXMLCharacter( char c ) {
+ // A parsed entity contains text, a sequence of characters, which may
+ // represent markup or character data. A character is an atomic unit of
+ // text as specified by ISO/IEC 10646 [ISO/IEC 10646]. Legal characters
+ // are tab, carriage return, line feed, and the legal graphic
characters
+ // of Unicode and ISO/IEC 10646. The use of "compatibility characters",
+ // as defined in section 6.8 of [Unicode], is discouraged.
+
+ // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
+ // [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate
+ // blocks, FFFE, and FFFF. */
+
if (c == '\n') return true;
if (c == '\r') return true;
if (c == '\t') return true;
+
+ //NOTE: this was BROKEN! The range between 0x80 and 0xFF is valid XML
+ //and would end up dropping latin characters in UTF-8. Why did I want
+ //to return false here again?
- if (c < 0x20) return false; if (c < 0x80) return true;
- if (c < 0xFF) return false; if (c <= 0xD7FF) return true;
+ //if (c < 0x20) return false; if (c < 0x80) return true;
+ //if (c < 0xFF) return false; if (c <= 0xD7FF) return true;
+
+ if (c < 0x20) return false; if (c <= 0xD7FF) return true;
if (c < 0xE000) return false; if (c <= 0xFFFD) return true;
if (c < 0x10000) return false; if (c <= 0x10FFFF) return true;
return false;
+
}
}
Modified: jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml
URL:
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml?view=diff&r1=151554&r2=151555
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml (original)
+++ jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml Sun Feb 6
00:29:56 2005
@@ -19,6 +19,10 @@
<item name="Wiki"
href="http://wiki.apache.org/jakarta-commons/FeedParser" />
+
+ <item name="ViewCVS"
+
href="http://svn.apache.org/viewcvs.cgi/jakarta/commons/sandbox/feedparser/trunk"
/>
+
</menu>
&common-menus;
</body>
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]