Author: rdonkin
Date: Mon Jul 23 12:54:08 2007
New Revision: 558847
URL: http://svn.apache.org/viewvc?view=rev&rev=558847
Log:
Pull parser patch https://issues.apache.org/jira/browse/MIME4J-19. Contributed
by Jochen Wiedmann.
Added:
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeTokenStream.java
james/mime4j/trunk/src/site/apt/
james/mime4j/trunk/src/site/apt/usage.apt
Modified:
james/mime4j/trunk/pom.xml
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeStreamParser.java
james/mime4j/trunk/src/site/site.xml
james/mime4j/trunk/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java
Modified: james/mime4j/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/pom.xml?view=diff&rev=558847&r1=558846&r2=558847
==============================================================================
--- james/mime4j/trunk/pom.xml (original)
+++ james/mime4j/trunk/pom.xml Mon Jul 23 12:54:08 2007
@@ -288,8 +288,7 @@
<artifactId>maven-javadoc-plugin</artifactId>
</plugin>
<plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>jxr-maven-plugin</artifactId>
+ <artifactId>maven-jxr-plugin</artifactId>
</plugin>
</plugins>
</reporting>
Modified:
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java?view=diff&rev=558847&r1=558846&r2=558847
==============================================================================
---
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java
(original)
+++
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeBoundaryInputStream.java
Mon Jul 23 12:54:08 2007
@@ -36,11 +36,11 @@
*/
public class MimeBoundaryInputStream extends InputStream {
- private PushbackInputStream s = null;
- private byte[] boundary = null;
+ private PushbackInputStream s;
+ private byte[] boundary;
private boolean first = true;
- private boolean eof = false;
- private boolean parenteof = false;
+ private boolean eof;
+ private boolean parenteof;
private boolean moreParts = true;
/**
Modified:
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeStreamParser.java
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeStreamParser.java?view=diff&rev=558847&r1=558846&r2=558847
==============================================================================
---
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeStreamParser.java
(original)
+++
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeStreamParser.java
Mon Jul 23 12:54:08 2007
@@ -21,13 +21,6 @@
import java.io.IOException;
import java.io.InputStream;
-import java.util.BitSet;
-import java.util.LinkedList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.james.mime4j.decoder.Base64InputStream;
-import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
/**
* <p>
@@ -50,30 +43,8 @@
* @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp
$
*/
public class MimeStreamParser {
- private static final Log log = LogFactory.getLog(MimeStreamParser.class);
-
- private static BitSet fieldChars = null;
-
- private RootInputStream rootStream = null;
- private LinkedList bodyDescriptors = new LinkedList();
private ContentHandler handler = null;
- private boolean raw = false;
-
- static {
- fieldChars = new BitSet();
- for (int i = 0x21; i <= 0x39; i++) {
- fieldChars.set(i);
- }
- for (int i = 0x3b; i <= 0x7e; i++) {
- fieldChars.set(i);
- }
- }
-
- /**
- * Creates a new <code>MimeStreamParser</code> instance.
- */
- public MimeStreamParser() {
- }
+ private final MimeTokenStream mimeTokenStream = new MimeTokenStream();
/**
* Parses a stream of bytes containing a MIME message.
@@ -83,8 +54,56 @@
* @throws IOException on I/O errors.
*/
public void parse(InputStream is) throws MimeException, IOException {
- rootStream = new RootInputStream(is);
- parseMessage(rootStream);
+ mimeTokenStream.parse(is);
+ OUTER: for (;;) {
+ int state = mimeTokenStream.getState();
+ switch (state) {
+ case MimeTokenStream.T_BODY:
+ handler.body(mimeTokenStream.getBodyDescriptor(),
mimeTokenStream.getInputStream());
+ break;
+ case MimeTokenStream.T_END_BODYPART:
+ handler.endBodyPart();
+ break;
+ case MimeTokenStream.T_END_HEADER:
+ handler.endHeader();
+ break;
+ case MimeTokenStream.T_END_MESSAGE:
+ handler.endMessage();
+ break;
+ case MimeTokenStream.T_END_MULTIPART:
+ handler.endMultipart();
+ break;
+ case MimeTokenStream.T_END_OF_STREAM:
+ break OUTER;
+ case MimeTokenStream.T_EPILOGUE:
+ handler.epilogue(mimeTokenStream.getInputStream());
+ break;
+ case MimeTokenStream.T_FIELD:
+ handler.field(mimeTokenStream.getField());
+ break;
+ case MimeTokenStream.T_PREAMBLE:
+ handler.preamble(mimeTokenStream.getInputStream());
+ break;
+ case MimeTokenStream.T_RAW_ENTITY:
+ handler.raw(mimeTokenStream.getInputStream());
+ break;
+ case MimeTokenStream.T_START_BODYPART:
+ handler.startBodyPart();
+ break;
+ case MimeTokenStream.T_START_HEADER:
+ handler.startHeader();
+ break;
+ case MimeTokenStream.T_START_MESSAGE:
+ handler.startMessage();
+ break;
+ case MimeTokenStream.T_START_MULTIPART:
+
handler.startMultipart(mimeTokenStream.getBodyDescriptor());
+ break;
+ default:
+ throw new IllegalStateException("Invalid state: " + state);
+ }
+ state = mimeTokenStream.next();
+ }
}
/**
@@ -95,7 +114,7 @@
* @see #setRaw(boolean)
*/
public boolean isRaw() {
- return raw;
+ return mimeTokenStream.isRaw();
}
/**
@@ -109,7 +128,7 @@
* disables it.
*/
public void setRaw(boolean raw) {
- this.raw = raw;
+ mimeTokenStream.setRaw(raw);
}
/**
@@ -125,193 +144,7 @@
* [EMAIL PROTECTED] ContentHandler#startMessage()}, etc.
*/
public void stop() {
- rootStream.truncate();
- }
-
- /**
- * Parses an entity which consists of a header followed by a body
containing
- * arbitrary data, body parts or an embedded message.
- *
- * @param is the stream to parse.
- * @throws MimeException if the entity can not be processed
- * @throws IOException on I/O errors.
- */
- private void parseEntity(InputStream is) throws MimeException, IOException
{
- BodyDescriptor bd = parseHeader(is);
-
- if (bd.isMultipart()) {
- bodyDescriptors.addFirst(bd);
-
- handler.startMultipart(bd);
-
- MimeBoundaryInputStream tempIs =
- new MimeBoundaryInputStream(is, bd.getBoundary());
- handler.preamble(new CloseShieldInputStream(tempIs));
- tempIs.consume();
-
- while (tempIs.hasMoreParts()) {
- tempIs = new MimeBoundaryInputStream(is, bd.getBoundary());
- parseBodyPart(tempIs);
- tempIs.consume();
- if (tempIs.parentEOF()) {
- if (log.isWarnEnabled()) {
- log.warn("Line " + rootStream.getLineNumber()
- + ": Body part ended prematurely. "
- + "Higher level boundary detected or "
- + "EOF reached.");
- }
- break;
- }
- }
-
- handler.epilogue(new CloseShieldInputStream(is));
-
- handler.endMultipart();
-
- bodyDescriptors.removeFirst();
-
- } else if (bd.isMessage()) {
- if (bd.isBase64Encoded()) {
- log.warn("base64 encoded message/rfc822 detected");
- is = new EOLConvertingInputStream(
- new Base64InputStream(is));
- } else if (bd.isQuotedPrintableEncoded()) {
- log.warn("quoted-printable encoded message/rfc822 detected");
- is = new EOLConvertingInputStream(
- new QuotedPrintableInputStream(is));
- }
- bodyDescriptors.addFirst(bd);
- parseMessage(is);
- bodyDescriptors.removeFirst();
- } else {
- handler.body(bd, new CloseShieldInputStream(is));
- }
-
- /*
- * Make sure the stream has been consumed.
- */
- while (is.read() != -1) {
- }
- }
-
- private void parseMessage(InputStream is)
- throws MimeException, IOException {
- if (raw) {
- handler.raw(new CloseShieldInputStream(is));
- } else {
- handler.startMessage();
- parseEntity(is);
- handler.endMessage();
- }
- }
-
- private void parseBodyPart(InputStream is)
- throws MimeException, IOException {
- if (raw) {
- handler.raw(new CloseShieldInputStream(is));
- } else {
- handler.startBodyPart();
- parseEntity(is);
- handler.endBodyPart();
- }
- }
-
- /**
- * Parses a header.
- *
- * @param is the stream to parse.
- * @return a <code>BodyDescriptor</code> describing the body following
- * the header.
- * @throws MimeException if the header can not be processed
- * @throws IOException on I/O errors
- */
- private BodyDescriptor parseHeader(InputStream is)
- throws MimeException, IOException {
- BodyDescriptor bd = new BodyDescriptor(bodyDescriptors.isEmpty()
- ? null : (BodyDescriptor) bodyDescriptors.getFirst());
-
- handler.startHeader();
-
- int lineNumber = rootStream.getLineNumber();
-
- StringBuffer sb = new StringBuffer();
- int curr = 0;
- int prev = 0;
- while ((curr = is.read()) != -1) {
- if (curr == '\n' && (prev == '\n' || prev == 0)) {
- /*
- * [\r]\n[\r]\n or an immediate \r\n have been seen.
- */
- sb.deleteCharAt(sb.length() - 1);
- break;
- }
- sb.append((char) curr);
- prev = curr == '\r' ? prev : curr;
- }
-
- if (curr == -1 && log.isWarnEnabled()) {
- log.warn("Line " + rootStream.getLineNumber()
- + ": Unexpected end of headers detected. "
- + "Boundary detected in header or EOF reached.");
- }
-
- int start = 0;
- int pos = 0;
- int startLineNumber = lineNumber;
- while (pos < sb.length()) {
- while (pos < sb.length() && sb.charAt(pos) != '\r') {
- pos++;
- }
- if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') {
- pos++;
- continue;
- }
-
- if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos + 2))) {
-
- /*
- * field should be the complete field data excluding the
- * trailing \r\n.
- */
- String field = sb.substring(start, pos);
- start = pos + 2;
-
- /*
- * Check for a valid field.
- */
- int index = field.indexOf(':');
- boolean valid = false;
- if (index != -1 && fieldChars.get(field.charAt(0))) {
- valid = true;
- String fieldName = field.substring(0, index).trim();
- for (int i = 0; i < fieldName.length(); i++) {
- if (!fieldChars.get(fieldName.charAt(i))) {
- valid = false;
- break;
- }
- }
-
- if (valid) {
- handler.field(field);
- bd.addField(fieldName, field.substring(index + 1));
- }
- }
-
- if (!valid && log.isWarnEnabled()) {
- log.warn("Line " + startLineNumber
- + ": Ignoring invalid field: '" + field.trim() +
"'");
- }
-
- startLineNumber = lineNumber;
- }
-
- pos += 2;
- lineNumber++;
- }
-
- handler.endHeader();
-
- return bd;
+ mimeTokenStream.stop();
}
/**
Added:
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeTokenStream.java
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeTokenStream.java?view=auto&rev=558847
==============================================================================
---
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeTokenStream.java
(added)
+++
james/mime4j/trunk/src/main/java/org/apache/james/mime4j/MimeTokenStream.java
Mon Jul 23 12:54:08 2007
@@ -0,0 +1,538 @@
+/****************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one *
+ * or more contributor license agreements. See the NOTICE file *
+ * distributed with this work for additional information *
+ * regarding copyright ownership. The ASF licenses this file *
+ * to you under the Apache License, Version 2.0 (the *
+ * "License"); you may not use this file except in compliance *
+ * with the License. You may obtain a copy of the License at *
+ * *
+ * http://www.apache.org/licenses/LICENSE-2.0 *
+ * *
+ * Unless required by applicable law or agreed to in writing, *
+ * software distributed under the License is distributed on an *
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
+ * KIND, either express or implied. See the License for the *
+ * specific language governing permissions and limitations *
+ * under the License. *
+ ****************************************************************/
+
+package org.apache.james.mime4j;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.james.mime4j.decoder.Base64InputStream;
+import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
+
+
+/**
+ * <p>
+ * Parses MIME (or RFC822) message streams of bytes or characters.
+ * The stream is converted into an event stream.
+ * <p>
+ * <p>
+ * Typical usage:
+ * </p>
+ * <pre>
+ * MimeTokenStream stream = new MimeTokenStream();
+ * stream.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
+ * for (int state = stream.getState();
+ * state != MimeTokenStream.T_END_OF_STREAM;
+ * state = stream.next()) {
+ * switch (state) {
+ * case MimeTokenStream.T_BODY:
+ * System.out.println("Body detected, contents = "
+ * + stream.getInputStream() + ", header data = "
+ * + stream.getBodyDescriptor());
+ * break;
+ * case MimeTokenStream.T_FIELD:
+ * System.out.println("Header field detected: "
+ * + stream.getField());
+ * break;
+ * case MimeTokenStream.T_START_MULTIPART:
+ * System.out.println("Multipart message detexted,"
+ * + " header data = "
+ * + stream.getBodyDescriptor());
+ * ...
+ * }
+ * }
+ * </pre>
+ * <p>
+ * <strong>NOTE:</strong> All lines must end with CRLF
+ * (<code>\r\n</code>). If you are unsure of the line endings in your stream
+ * you should wrap it in a [EMAIL PROTECTED]
org.apache.james.mime4j.EOLConvertingInputStream}
+ * instance.</p>
+ * <p>Instances of [EMAIL PROTECTED] MimeTokenStream} are reusable: Invoking
the
+ * method [EMAIL PROTECTED] #parse(InputStream)} resets the token streams
internal
+ * state. However, they are definitely <em>not</em> thread safe. If you
+ * have a multi threaded application, then the suggested use is to have
+ * one instance per thread.</p>
+ *
+ * @version $Id: MimeStreamParser.java,v 1.8 2005/02/11 10:12:02 ntherning Exp
$
+ */
+public class MimeTokenStream {
+ private static final Log log = LogFactory.getLog(MimeStreamParser.class);
+
+ /**
+ * This token indicates, that the MIME stream has been completely
+ * and successfully parsed, and no more data is available.
+ */
+ public static final int T_END_OF_STREAM = -1;
+ /**
+ * This token indicates, that the MIME stream is currently
+ * at the beginning of a message.
+ */
+ public static final int T_START_MESSAGE = 0;
+ /**
+ * This token indicates, that the MIME stream is currently
+ * at the end of a message.
+ */
+ public static final int T_END_MESSAGE = 1;
+ /**
+ * This token indicates, that a raw entity is currently being processed.
+ * You may call [EMAIL PROTECTED] #getInputStream()} to obtain the raw
entity
+ * data.
+ */
+ public static final int T_RAW_ENTITY = 2;
+ /**
+ * This token indicates, that a message parts headers are now
+ * being parsed.
+ */
+ public static final int T_START_HEADER = 3;
+ /**
+ * This token indicates, that a message parts field has now
+ * been parsed. You may call [EMAIL PROTECTED] #getField()} to obtain the
+ * raw field contents.
+ */
+ public static final int T_FIELD = 4;
+ /**
+ * This token indicates, that part headers have now been
+ * parsed.
+ */
+ public static final int T_END_HEADER = 5;
+ /**
+ * This token indicates, that a multipart body is being parsed.
+ */
+ public static final int T_START_MULTIPART = 6;
+ /**
+ * This token indicates, that a multipart body has been parsed.
+ */
+ public static final int T_END_MULTIPART = 7;
+ /**
+ * This token indicates, that a multiparts preamble is being
+ * parsed. You may call [EMAIL PROTECTED] #getInputStream()} to access the
+ * preamble contents.
+ */
+ public static final int T_PREAMBLE = 8;
+ /**
+ * This token indicates, that a multiparts epilogue is being
+ * parsed. You may call [EMAIL PROTECTED] #getInputStream()} to access the
+ * epilogue contents.
+ */
+ public static final int T_EPILOGUE = 9;
+ /**
+ * This token indicates, that the MIME stream is currently
+ * at the beginning of a body part.
+ */
+ public static final int T_START_BODYPART = 10;
+ /**
+ * This token indicates, that the MIME stream is currently
+ * at the end of a body part.
+ */
+ public static final int T_END_BODYPART = 11;
+ /**
+ * This token indicates, that an atomic entity is being parsed.
+ * Use [EMAIL PROTECTED] #getInputStream()} to access the entity contents.
+ */
+ public static final int T_BODY = 12;
+ /**
+ * Internal state, not exposed.
+ */
+ private static final int T_IN_BODYPART = -2;
+ /**
+ * Internal state, not exposed.
+ */
+ private static final int T_IN_MESSAGE = -3;
+
+ private static final BitSet fieldChars = new BitSet();
+ static {
+ for (int i = 0x21; i <= 0x39; i++) {
+ fieldChars.set(i);
+ }
+ for (int i = 0x3b; i <= 0x7e; i++) {
+ fieldChars.set(i);
+ }
+ }
+
+ abstract static class StateMachine {
+ int state;
+ abstract int next() throws IOException, MimeException;
+ }
+
+ private static class RawEntity extends StateMachine {
+ private InputStream stream;
+ RawEntity(InputStream stream) {
+ this.stream = stream;
+ state = T_RAW_ENTITY;
+ }
+ int next() {
+ state = T_END_OF_STREAM;
+ return state;
+ }
+ }
+
+ private abstract class Entity extends StateMachine {
+ private final BodyDescriptor parent;
+ private final InputStream contents;
+ private final StringBuffer sb = new StringBuffer();
+ private BodyDescriptor body;
+ private int pos, start;
+ private int lineNumber, startLineNumber;
+ private final int endState;
+ private MimeBoundaryInputStream mbis;
+ InputStream stream;
+ String field;
+
+ Entity(InputStream contents, BodyDescriptor parent, int startState,
int endState) {
+ this.parent = parent;
+ this.contents = contents;
+ state = startState;
+ this.endState = endState;
+ }
+
+ private void setParsingFieldState() {
+ state = parseField() ? T_FIELD : T_END_HEADER;
+ }
+
+ private int setParseBodyPartState() throws IOException {
+ mbis.consume();
+ if (mbis.parentEOF()) {
+ if (log.isWarnEnabled()) {
+ log.warn("Line " + rootInputStream.getLineNumber()
+ + ": Body part ended prematurely. "
+ + "Higher level boundary detected or "
+ + "EOF reached.");
+ }
+ } else {
+ if (mbis.hasMoreParts()) {
+ mbis = new MimeBoundaryInputStream(contents,
body.getBoundary());
+ if (isRaw()) {
+ currentStateMachine = new RawEntity(mbis);
+ } else {
+ currentStateMachine = new BodyPart(mbis, body);
+ }
+ entities.add(currentStateMachine);
+ state = T_IN_BODYPART;
+ return currentStateMachine.state;
+ }
+ }
+ state = T_EPILOGUE;
+ stream = new CloseShieldInputStream(contents);
+ return T_EPILOGUE;
+ }
+
+ int next() throws IOException, MimeException {
+ switch (state) {
+ case T_START_MESSAGE:
+ case T_START_BODYPART:
+ state = T_START_HEADER;
+ break;
+ case T_START_HEADER:
+ initHeaderParsing();
+ setParsingFieldState();
+ break;
+ case T_FIELD:
+ setParsingFieldState();
+ break;
+ case T_END_HEADER:
+ if (body.isMultipart()) {
+ state = T_START_MULTIPART;
+ } else if (body.isMessage()) {
+ InputStream is = contents;
+ if (body.isBase64Encoded()) {
+ log.warn("base64 encoded message/rfc822 detected");
+ is = new EOLConvertingInputStream(new
Base64InputStream(contents));
+ } else if (body.isQuotedPrintableEncoded()) {
+ log.warn("quoted-printable encoded message/rfc822
detected");
+ is = new EOLConvertingInputStream(new
QuotedPrintableInputStream(contents));
+ }
+ state = endState;
+ return parseMessage(is, body);
+ } else {
+ stream = new CloseShieldInputStream(contents);
+ state = T_BODY;
+ break;
+ }
+ break;
+ case T_START_MULTIPART:
+ mbis = new MimeBoundaryInputStream(contents,
body.getBoundary());
+ stream = new CloseShieldInputStream(mbis);
+ state = T_PREAMBLE;
+ break;
+ case T_PREAMBLE:
+ return setParseBodyPartState();
+ case T_IN_BODYPART:
+ return setParseBodyPartState();
+ case T_EPILOGUE:
+ state = T_END_MULTIPART;
+ break;
+ case T_BODY:
+ case T_END_MULTIPART:
+ case T_IN_MESSAGE:
+ state = endState;
+ break;
+ default:
+ if (state == endState) {
+ state = T_END_OF_STREAM;
+ break;
+ }
+ throw new IllegalStateException("Invalid state: " + state);
+ }
+ return state;
+ }
+
+ private void initHeaderParsing() throws IOException {
+ body = new BodyDescriptor(parent);
+ startLineNumber = lineNumber = rootInputStream.getLineNumber();
+
+ int curr = 0;
+ int prev = 0;
+ while ((curr = contents.read()) != -1) {
+ if (curr == '\n' && (prev == '\n' || prev == 0)) {
+ /*
+ * [\r]\n[\r]\n or an immediate \r\n have been seen.
+ */
+ sb.deleteCharAt(sb.length() - 1);
+ break;
+ }
+ sb.append((char) curr);
+ prev = curr == '\r' ? prev : curr;
+ }
+
+ if (curr == -1 && log.isWarnEnabled()) {
+ log.warn("Line " + rootInputStream.getLineNumber()
+ + ": Unexpected end of headers detected. "
+ + "Boundary detected in header or EOF reached.");
+ }
+ }
+
+ private boolean parseField() {
+ while (pos < sb.length()) {
+ while (pos < sb.length() && sb.charAt(pos) != '\r') {
+ pos++;
+ }
+ if (pos < sb.length() - 1 && sb.charAt(pos + 1) != '\n') {
+ pos++;
+ continue;
+ }
+ if (pos >= sb.length() - 2 || fieldChars.get(sb.charAt(pos +
2))) {
+ /*
+ * field should be the complete field data excluding the
+ * trailing \r\n.
+ */
+ field = sb.substring(start, pos);
+ start = pos + 2;
+
+ /*
+ * Check for a valid field.
+ */
+ int index = field.indexOf(':');
+ boolean valid = false;
+ if (index != -1 && fieldChars.get(field.charAt(0))) {
+ valid = true;
+ String fieldName = field.substring(0, index).trim();
+ for (int i = 0; i < fieldName.length(); i++) {
+ if (!fieldChars.get(fieldName.charAt(i))) {
+ valid = false;
+ break;
+ }
+ }
+ if (valid) {
+ body.addField(fieldName, field.substring(index +
1));
+ startLineNumber = lineNumber;
+ pos += 2;
+ lineNumber++;
+ return true;
+ }
+ }
+ if (log.isWarnEnabled()) {
+ log.warn("Line " + startLineNumber
+ + ": Ignoring invalid field: '" + field.trim()
+ "'");
+ }
+ startLineNumber = lineNumber;
+ }
+ pos += 2;
+ lineNumber++;
+ }
+ return false;
+ }
+ }
+
+ private class Message extends Entity {
+ Message(InputStream contents, BodyDescriptor parent) {
+ super(contents, parent, T_START_MESSAGE, T_END_MESSAGE);
+ }
+ }
+
+ private class BodyPart extends Entity {
+ BodyPart(InputStream contents, BodyDescriptor parent) {
+ super(contents, parent, T_START_BODYPART, T_END_BODYPART);
+ }
+ }
+
+ private int state = T_END_OF_STREAM;
+ private RootInputStream rootInputStream;
+ private StateMachine currentStateMachine;
+ private final List entities = new ArrayList();
+ private boolean raw;
+
+ /** Instructs the [EMAIL PROTECTED] MimeTokenStream} to parse the given
streams contents.
+ * If the [EMAIL PROTECTED] MimeTokenStream} has already been in use,
resets the streams
+ * internal state.
+ */
+ public void parse(InputStream stream) {
+ entities.clear();
+ rootInputStream = new RootInputStream(stream);
+ state = parseMessage(rootInputStream, null);
+ }
+
+ private int parseMessage(InputStream pStream, BodyDescriptor parent) {
+ if (isRaw()) {
+ currentStateMachine = new RawEntity(pStream);
+ } else {
+ currentStateMachine = new Message(pStream, parent);
+ }
+ entities.add(currentStateMachine);
+ return currentStateMachine.state;
+ }
+
+ /**
+ * Determines if this parser is currently in raw mode.
+ *
+ * @return <code>true</code> if in raw mode, <code>false</code>
+ * otherwise.
+ * @see #setRaw(boolean)
+ */
+ public boolean isRaw() {
+ return raw;
+ }
+
+ /**
+ * Enables or disables raw mode. In raw mode all future entities
+ * (messages or body parts) in the stream will be reported to the
+ * [EMAIL PROTECTED] ContentHandler#raw(InputStream)} handler method only.
+ * The stream will contain the entire unparsed entity contents
+ * including header fields and whatever is in the body.
+ *
+ * @param raw <code>true</code> enables raw mode, <code>false</code>
+ * disables it.
+ */
+ public void setRaw(boolean raw) {
+ this.raw = raw;
+ }
+
+ /**
+ * Finishes the parsing and stops reading lines.
+ * NOTE: No more lines will be parsed but the parser
+ * will still call
+ * [EMAIL PROTECTED] ContentHandler#endMultipart()},
+ * [EMAIL PROTECTED] ContentHandler#endBodyPart()},
+ * [EMAIL PROTECTED] ContentHandler#endMessage()}, etc to match previous
calls
+ * to
+ * [EMAIL PROTECTED] ContentHandler#startMultipart(BodyDescriptor)},
+ * [EMAIL PROTECTED] ContentHandler#startBodyPart()},
+ * [EMAIL PROTECTED] ContentHandler#startMessage()}, etc.
+ */
+ public void stop() {
+ rootInputStream.truncate();
+ }
+
+ /**
+ * Returns the current state.
+ */
+ public int getState() {
+ return state;
+ }
+
+ /**
+ * This method is valid, if [EMAIL PROTECTED] #getState()} returns [EMAIL
PROTECTED] #T_FIELD}.
+ * @return String with the fields raw contents.
+ * @throws IllegalStateException [EMAIL PROTECTED] #getState()} returns
another
+ * value than [EMAIL PROTECTED] #T_FIELD}.
+ */
+ public String getField() {
+ switch (getState()) {
+ case T_FIELD:
+ return ((Entity) currentStateMachine).field;
+ default:
+ throw new IllegalStateException("Expected state to be
T_FIELD.");
+ }
+ }
+
+ /**
+ * This method is valid, if [EMAIL PROTECTED] #getState()} returns either
of
+ * [EMAIL PROTECTED] #T_RAW_ENTITY}, [EMAIL PROTECTED] #T_PREAMBLE}, or
[EMAIL PROTECTED] #T_EPILOGUE}.
+ * It returns the raw entity, preamble, or epilogue contents.
+ * @return Data stream, depending on the current state.
+ * @throws IllegalStateException [EMAIL PROTECTED] #getState()} returns an
+ * invalid value.
+ */
+ public InputStream getInputStream() {
+ switch (getState()) {
+ case T_RAW_ENTITY:
+ return ((RawEntity) currentStateMachine).stream;
+ case T_PREAMBLE:
+ case T_EPILOGUE:
+ case T_BODY:
+ return ((Entity) currentStateMachine).stream;
+ default:
+ throw new IllegalStateException("Expected state to be either
of T_RAW_ENTITY, T_PREAMBLE, or T_EPILOGUE.");
+ }
+ }
+
+ /**
+ * This method is valid, if [EMAIL PROTECTED] #getState()} returns
+ * [EMAIL PROTECTED] #T_BODY}, or [EMAIL PROTECTED] #T_START_MULTIPART}.
It returns the current
+ * entities body descriptor.
+ */
+ public BodyDescriptor getBodyDescriptor() {
+ switch (getState()) {
+ case T_BODY:
+ case T_START_MULTIPART:
+ return ((Entity) currentStateMachine).body;
+ default:
+ throw new IllegalStateException("Expected state to be
T_BODY.");
+ }
+ }
+
+ /**
+ * This method advances the token stream to the next token.
+ * @throws IllegalStateException The method has been called, although
+ * [EMAIL PROTECTED] #getState()} was already [EMAIL PROTECTED]
#T_END_OF_STREAM}.
+ */
+ public int next() throws IOException, MimeException {
+ if (state == T_END_OF_STREAM || currentStateMachine == null) {
+ throw new IllegalStateException("No more tokens are available.");
+ }
+ while (currentStateMachine != null) {
+ state = currentStateMachine.next();
+ if (state != T_END_OF_STREAM) {
+ return state;
+ }
+ entities.remove(entities.size()-1);
+ if (entities.size() == 0) {
+ currentStateMachine = null;
+ } else {
+ currentStateMachine = (StateMachine)
entities.get(entities.size()-1);
+ }
+ }
+ state = T_END_OF_STREAM;
+ return state;
+ }
+}
Added: james/mime4j/trunk/src/site/apt/usage.apt
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/src/site/apt/usage.apt?view=auto&rev=558847
==============================================================================
--- james/mime4j/trunk/src/site/apt/usage.apt (added)
+++ james/mime4j/trunk/src/site/apt/usage.apt Mon Jul 23 12:54:08 2007
@@ -0,0 +1,191 @@
+
+~~ Licensed to the Apache Software Foundation (ASF) under one
+~~ or more contributor license agreements. See the NOTICE file
+~~ distributed with this work for additional information
+~~ regarding copyright ownership. The ASF licenses this file
+~~ to you under the Apache License, Version 2.0 (the
+~~ "License"); you may not use this file except in compliance
+~~ with the License. You may obtain a copy of the License at
+~~
+~~ http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing,
+~~ software distributed under the License is distributed on an
+~~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+~~ KIND, either express or implied. See the License for the
+~~ specific language governing permissions and limitations
+~~ under the License.
+
+ -------------
+ Usage
+ -------------
+
+{Usage}
+
+ Mime4j provides two different API's: An event based API by using
+ the {{{apidocs/org/apache/james/mime4j/MimeStreamParser.html}
+ MimeStreamParser}}. Alternatively, you may use the iterative
+ API, which is available through the
+ {{{apidocs/org/apache/james/mime4j/MimeTokenStream.html}
+ MimeTokenStream}}. In terms of speed, you should not note
+ any differences.
+
+ * {{{#Token Streams}Token Streams}}
+
+ * {{{#Sample Token Stream}Sample Token Stream}}
+
+ * {{{#Event Handlers}Event Handlers}}
+
+ * {{{#Sample Event Stream}Sample Event Stream}}
+
+{Token Streams}
+
+ The iterative approach is using the class
+ {{{apidocs/org/apache/james/mime4j/MimeTokenStream.html}
+ MimeTokenStream}}. Here's an example, how you could use
+ the token stream:
+
+--------------------------------------------------------------------
+ MimeTokenStream stream = new MimeTokenStream();
+ stream.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
+ for (int state = stream.getState();
+ state != MimeTokenStream.T_END_OF_STREAM;
+ state = stream.next()) {
+ switch (state) {
+ case MimeTokenStream.T_BODY:
+ System.out.println("Body detected, contents = "
+ + stream.getInputStream() + ", header data = "
+ + stream.getBodyDescriptor());
+ break;
+ case MimeTokenStream.T_FIELD:
+ System.out.println("Header field detected: "
+ + stream.getField());
+ break;
+ case MimeTokenStream.T_START_MULTIPART:
+ System.out.println("Multipart message detexted,"
+ + " header data = "
+ + stream.getBodyDescriptor());
+ ...
+ }
+ }
+--------------------------------------------------------------------
+
+ The token stream provides a set of tokens. Tokens are identified
+ by a state. Most states are simply event indicators, with no
+ additional data available. However, there are some states,
+ which provide additional data. For example, the state
+ <<<T_BODY>>>, which indicates that an actual body is available,
+ If you note this state, then you may ask for the bodies contents,
+ which are provided through the <<<getInputStream()>>> method,
+ or you might ask for the header data by invoking
+ <<<getBodyDescriptor()>>>.
+
+{Sample Token Stream}
+
+ The following sample should give you a rough idea of the order,
+ in which you'll receive tokens:
+
+--------------------------------------------------------------------
+ T_START_MESSAGE
+ T_START_HEADER
+ T_FIELD
+ T_FIELD
+ ...
+ T_END_HEADER
+ T_START_MULTIPART
+ T_PREAMBLE
+ T_START_BODYPART
+ T_START_HEADER
+ T_FIELD
+ T_FIELD
+ ...
+ T_END_HEADER
+ T_BODY
+ T_END_BODYPART
+ T_START_BODYPART
+ T_START_HEADER
+ T_FIELD
+ T_FIELD
+ ...
+ T_END_HEADER
+ T_BODY
+ T_END_BODYPART
+ T_EPILOGUE
+ T_END_MULTIPART
+ T_END_MESSAGE
+--------------------------------------------------------------------
+
+ The example shows a multipart message with two parts.
+
+{Event Handlers}
+
+ The event based API requires, that you provide an event handler,
+ which receives events. The event handler is an object, which
+ implements the {{{apidocs/org/apache/james/mime4j/ContentHandler.html}
+ ContentHandler}} interface. Here's an example, how you could
+ implement an event handler:
+
+--------------------------------------------------------------------
+ public class MyContentHandler extends org.apache.james.mime4j.ContentHandler
{
+ public body(BodyDescriptor bd, InputStream is)
+ throws MimeException, IOException {
+ System.out.println("Body detected, contents = "
+ + is + ", header data = " + bd);
+ }
+ public void field(String fieldData) throws MimeException {
+ System.out.println("Header field detected: "
+ + fieldData);
+ }
+ public void startMultipart(BodyDescriptor bd) throws MimeException {
+ System.out.println("Multipart message detexted, header data = "
+ + bd);
+ }
+ ...
+ }
+--------------------------------------------------------------------
+
+ A little bit of additional code allows us to create an example, which
+ is functionally equivalent to the example from the section on
+ {{{#Token Streams}Token Streams}}:
+
+--------------------------------------------------------------------
+ ContentHandler handler = new MyContentHandler();
+ MimeStreamParser parser = new MimeStreamParser();
+ parser.setContentHandler(handler);
+ parser.parse(new BufferedInputStream(new FileInputStream("mime.msg")));
+--------------------------------------------------------------------
+
+{Sample Event Stream}
+
+ Like above for tokens, we provide an additional example, which
+ demonstrates the typical order of events that you have to expect:
+
+--------------------------------------------------------------------
+ startMessage()
+ startHeader()
+ field(...)
+ field(...)
+ ...
+ endHeader()
+ startMultipart()
+ preamble(...)
+ startBodyPart()
+ startHeader()
+ field(...)
+ field(...)
+ ...
+ endHeader()
+ body()
+ endBodyPart()
+ startBodyPart()
+ startHeader()
+ field(...)
+ field(...)
+ ...
+ endHeader()
+ body()
+ endBodyPart()
+ epilogue(...)
+ endMultipart()
+ endMessage()
+--------------------------------------------------------------------
Modified: james/mime4j/trunk/src/site/site.xml
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/src/site/site.xml?view=diff&rev=558847&r1=558846&r2=558847
==============================================================================
--- james/mime4j/trunk/src/site/site.xml (original)
+++ james/mime4j/trunk/src/site/site.xml Mon Jul 23 12:54:08 2007
@@ -32,7 +32,8 @@
<menu name="mime4j">
<item name="Overview" href="/index.html"/>
<item name="News and Status" href="/status.html"/>
- <item name="Example" href="/samples.html"/>
+ <item name="Example" href="/samples.html"/>
+ <item name="Usage" href="/usage.html"/>
<item name="Start" href="/start/index.html" collapse="true">
<item name="Download" href="/start/download.html" collapse="true"/>
<item name="Build" href="/start/build.html" collapse="true"/>
Modified:
james/mime4j/trunk/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java
URL:
http://svn.apache.org/viewvc/james/mime4j/trunk/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java?view=diff&rev=558847&r1=558846&r2=558847
==============================================================================
---
james/mime4j/trunk/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java
(original)
+++
james/mime4j/trunk/src/test/java/org/apache/james/mime4j/MimeStreamParserTest.java
Mon Jul 23 12:54:08 2007
@@ -438,7 +438,6 @@
try {
String expected = IOUtils.toString(new
FileInputStream(xmlFile), "ISO8859-1");
-
assertEquals("Error parsing " + f.getName(), expected,
result);
} catch (FileNotFoundException e) {
FileOutputStream fos = new FileOutputStream(xmlFileMime4j);
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]