Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
 Sun Aug 31 19:36:36 2014
@@ -23,6 +23,7 @@ import java.text.SimpleDateFormat;
 import java.util.Collections;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Set;
 import java.util.TimeZone;
 
@@ -160,7 +161,7 @@ public class IptcAnpaParser implements P
          }
          int msgsize = is.read(buf);                // read in at least the 
full data
 
-         String message = (new String(buf)).toLowerCase();
+         String message = (new String(buf, "UTF-8")).toLowerCase(Locale.ROOT);
          // these are not if-then-else, because we want to go from most common
          // and fall through to least.  this is imperfect, as these tags could
          // show up in other agency stories, but i can't find a spec or any
@@ -590,7 +591,7 @@ public class IptcAnpaParser implements P
                      --read;
                   }
                }
-               if (tmp_line.toLowerCase().startsWith("by") || 
longline.equals("bdy_author")) {
+               if (tmp_line.toLowerCase(Locale.ROOT).startsWith("by") || 
longline.equals("bdy_author")) {
                   longkey = "bdy_author";
 
                   // prepend a space to subsequent line, so it gets parsed 
consistent with the lead line
@@ -608,7 +609,7 @@ public class IptcAnpaParser implements P
                }
                else if (FORMAT == this.FMT_IPTC_BLM) {
                   String byline = "   by ";
-                  if (tmp_line.toLowerCase().contains(byline)) {
+                  if (tmp_line.toLowerCase(Locale.ROOT).contains(byline)) {
                      longkey = "bdy_author";
 
                      int term = tmp_line.length();
@@ -617,11 +618,11 @@ public class IptcAnpaParser implements P
                      term = Math.min(term, (tmp_line.contains("\n") ? 
tmp_line.indexOf("\n") : term));
                      term = (term > 0 ) ? term : tmp_line.length();
                      // for bloomberg, the author line sits below their 
copyright statement
-                     bdy_author += 
tmp_line.substring(tmp_line.toLowerCase().indexOf(byline) + byline.length(), 
term) + " ";
+                     bdy_author += 
tmp_line.substring(tmp_line.toLowerCase(Locale.ROOT).indexOf(byline) + 
byline.length(), term) + " ";
                      metastarted = true;
                      longline = ((tmp_line.contains("=")) && 
(!longline.equals(longkey)) ? longkey : "");
                   }
-                  else if(tmp_line.toLowerCase().startsWith("c.")) {
+                  else if(tmp_line.toLowerCase(Locale.ROOT).startsWith("c.")) {
                      // the author line for bloomberg is a multiline starting 
with c.2011 Bloomberg News
                      // then containing the author info on the next line
                      if (val_next == TB) {
@@ -629,7 +630,7 @@ public class IptcAnpaParser implements P
                         continue;
                      }
                   }
-                  else if(tmp_line.toLowerCase().trim().startsWith("(") && 
tmp_line.toLowerCase().trim().endsWith(")")) {
+                  else 
if(tmp_line.toLowerCase(Locale.ROOT).trim().startsWith("(") && 
tmp_line.toLowerCase(Locale.ROOT).trim().endsWith(")")) {
                      // the author line may have one or more comment lines 
between the copyright
                      // statement, and the By AUTHORNAME line
                      if (val_next == TB) {
@@ -639,7 +640,7 @@ public class IptcAnpaParser implements P
                   }
                }
 
-               else if (tmp_line.toLowerCase().startsWith("eds") || 
longline.equals("bdy_source")) {
+               else if (tmp_line.toLowerCase(Locale.ROOT).startsWith("eds") || 
longline.equals("bdy_source")) {
                   longkey = "bdy_source";
                   // prepend a space to subsequent line, so it gets parsed 
consistent with the lead line
                   tmp_line = (longline.equals(longkey) ? " " : "") + tmp_line;
@@ -736,14 +737,14 @@ public class IptcAnpaParser implements P
                   // standard reuters format
                   format_in = "HH:mm MM-dd-yy";
                }
-               SimpleDateFormat dfi =   new SimpleDateFormat(format_in);
+               SimpleDateFormat dfi = new SimpleDateFormat(format_in, 
Locale.ROOT);
                dfi.setTimeZone(TimeZone.getTimeZone("UTC"));
                dateunix = dfi.parse(ftr_datetime);
             }
             catch (ParseException ep) {
                // failed, but this will just fall through to setting the date 
to now
             }
-            SimpleDateFormat dfo =   new SimpleDateFormat(format_out);
+            SimpleDateFormat dfo = new SimpleDateFormat(format_out, 
Locale.ROOT);
             dfo.setTimeZone(TimeZone.getTimeZone("UTC"));
             ftr_datetime = dfo.format(dateunix);
          }

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/iwork/AutoPageNumberUtils.java
 Sun Aug 31 19:36:36 2014
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.parser.iwork;
 
+import java.util.Locale;
+
 /**
  * Utility class to allow for conversion from an integer to Roman numerals
  * or alpha-numeric symbols in line with Pages auto numbering formats.
@@ -44,7 +46,7 @@ package org.apache.tika.parser.iwork;
        }
        
        public static String asAlphaNumericLower(int i) {
-               return asAlphaNumeric(i).toLowerCase();
+               return asAlphaNumeric(i).toLowerCase(Locale.ROOT);
        }
        
        /*
@@ -73,7 +75,7 @@ package org.apache.tika.parser.iwork;
     }
     
        public static String asRomanNumeralsLower(int i) {
-               return asRomanNumerals(i).toLowerCase();
+               return asRomanNumerals(i).toLowerCase(Locale.ROOT);
        }
     
     private static int i2r(StringBuffer sbuff, int i,

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
 Sun Aug 31 19:36:36 2014
@@ -26,7 +26,11 @@ import org.apache.james.mime4j.dom.addre
 import org.apache.james.mime4j.dom.address.AddressList;
 import org.apache.james.mime4j.dom.address.Mailbox;
 import org.apache.james.mime4j.dom.address.MailboxList;
-import org.apache.james.mime4j.dom.field.*;
+import org.apache.james.mime4j.dom.field.AddressListField;
+import org.apache.james.mime4j.dom.field.DateTimeField;
+import org.apache.james.mime4j.dom.field.MailboxListField;
+import org.apache.james.mime4j.dom.field.ParsedField;
+import org.apache.james.mime4j.dom.field.UnstructuredField;
 import org.apache.james.mime4j.field.LenientFieldParser;
 import org.apache.james.mime4j.parser.ContentHandler;
 import org.apache.james.mime4j.stream.BodyDescriptor;
@@ -141,8 +145,7 @@ class MailContentHandler implements Cont
     /**
      * Header for the whole message or its parts
      * 
-     * @see http 
-     *      ://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/
+     * @see 
http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/
      *      Field.html
      **/
     public void field(Field field) throws MimeException {

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mat/MatParser.java
 Sun Aug 31 19:36:36 2014
@@ -86,7 +86,7 @@ public class MatParser extends AbstractP
             }
 
             // Get endian indicator from header file
-            String endianBytes = new String(hdr.getEndianIndicator()); // 
Retrieve endian bytes and convert to string
+            String endianBytes = new String(hdr.getEndianIndicator(), 
"UTF-8"); // Retrieve endian bytes and convert to string
             String endianCode = String.valueOf(endianBytes.toCharArray()); // 
Convert bytes to characters to string
             metadata.set("endian", endianCode);
 

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
 Sun Aug 31 19:36:36 2014
@@ -167,7 +167,7 @@ public class MboxParser extends Abstract
       return; // ignore malformed header lines
     }
 
-    String headerTag = headerMatcher.group(1).toLowerCase();
+    String headerTag = headerMatcher.group(1).toLowerCase(Locale.ROOT);
     String headerContent = headerMatcher.group(2);
 
     if (headerTag.equalsIgnoreCase("From")) {

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
 Sun Aug 31 19:36:36 2014
@@ -140,7 +140,7 @@ public class OutlookPSTParser extends Ab
     mailMetadata.set("priority", valueOf(pstMail.getPriority()));
     mailMetadata.set("flagged", valueOf(pstMail.isFlagged()));
 
-    byte[] mailContent = pstMail.getBody().getBytes();
+    byte[] mailContent = pstMail.getBody().getBytes("UTF-8");
     embeddedExtractor.parseEmbedded(new ByteArrayInputStream(mailContent), 
handler, mailMetadata, true);
   }
 

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/HSLFExtractor.java
 Sun Aug 31 19:36:36 2014
@@ -20,7 +20,18 @@ import java.io.IOException;
 import java.util.HashSet;
 
 import org.apache.poi.hslf.HSLFSlideShow;
-import org.apache.poi.hslf.model.*;
+import org.apache.poi.hslf.model.Comment;
+import org.apache.poi.hslf.model.HeadersFooters;
+import org.apache.poi.hslf.model.MasterSheet;
+import org.apache.poi.hslf.model.Notes;
+import org.apache.poi.hslf.model.OLEShape;
+import org.apache.poi.hslf.model.Picture;
+import org.apache.poi.hslf.model.Shape;
+import org.apache.poi.hslf.model.Slide;
+import org.apache.poi.hslf.model.Table;
+import org.apache.poi.hslf.model.TableCell;
+import org.apache.poi.hslf.model.TextRun;
+import org.apache.poi.hslf.model.TextShape;
 import org.apache.poi.hslf.usermodel.ObjectData;
 import org.apache.poi.hslf.usermodel.PictureData;
 import org.apache.poi.hslf.usermodel.SlideShow;

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
 Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.text.ParseException;
 import java.util.Date;
+import java.util.Locale;
 
 import org.apache.poi.hmef.attribute.MAPIRtfAttribute;
 import org.apache.poi.hsmf.MAPIMessage;
@@ -126,7 +127,7 @@ public class OutlookExtractor extends Ab
                  String[] headers = msg.getHeaders();
                  if(headers != null && headers.length > 0) {
                      for(String header: headers) {
-                        if(header.toLowerCase().startsWith("date:")) {
+                        
if(header.toLowerCase(Locale.ROOT).startsWith("date:")) {
                             String date = 
header.substring(header.indexOf(':')+1).trim();
                             
                             // See if we can parse it as a normal mail date

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
 Sun Aug 31 19:36:36 2014
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 
@@ -233,7 +234,7 @@ public class WordExtractor extends Abstr
           CharacterRun cr = p.getCharacterRun(j);
 
           // FIELD_BEGIN_MARK:
-          if (cr.text().getBytes()[0] == 0x13) {
+          if (cr.text().getBytes("UTF-8")[0] == 0x13) {
              Field field = document.getFields().getFieldByStartOffset(docPart, 
cr.getStartOffset());
              // 58 is an embedded document
              // 56 is a document link
@@ -548,7 +549,7 @@ public class WordExtractor extends Abstr
            tag = "h" + Math.min(num, 6);
        } else {
            styleClass = styleName.replace(' ', '_');
-           styleClass = styleClass.substring(0,1).toLowerCase() +
+           styleClass = styleClass.substring(0,1).toLowerCase(Locale.ROOT) +
                styleClass.substring(1);
        }
 

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
 Sun Aug 31 19:36:36 2014
@@ -217,11 +217,10 @@ public abstract class AbstractOOXMLExtra
     private void handleEmbeddedOLE(PackagePart part, ContentHandler handler, 
String rel)
             throws IOException, SAXException {
         // A POIFSFileSystem needs to be at least 3 blocks big to be valid
-        // TODO: TIKA-1118 Upgrade to POI 4.0 then enable this block of code
-//        if (part.getSize() >= 0 && part.getSize() < 512*3) {
-//           // Too small, skip
-//           return;
-//        }
+        if (part.getSize() >= 0 && part.getSize() < 512*3) {
+           // Too small, skip
+           return;
+        }
        
         // Open the POIFS (OLE2) structure and process
         POIFSFileSystem fs = new POIFSFileSystem(part.getInputStream());

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/CompositeTagHandler.java
 Sun Aug 31 19:36:36 2014
@@ -113,4 +113,30 @@ public class CompositeTagHandler impleme
         return null;
     }
 
+    public String getAlbumArtist() {
+        for (ID3Tags tag : tags) {
+            if (tag.getAlbumArtist() != null) {
+                return tag.getAlbumArtist();
+            }
+        }
+        return null;
+    }
+
+    public String getDisc() {
+        for (ID3Tags tag : tags) {
+            if (tag.getDisc() != null) {
+                return tag.getDisc();
+            }
+        }
+        return null;
+    }
+
+    public String getCompilation() {
+        for (ID3Tags tag : tags) {
+            if (tag.getCompilation() != null) {
+                return tag.getCompilation();
+            }
+        }
+        return null;
+    }
 }

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3Tags.java
 Sun Aug 31 19:36:36 2014
@@ -18,7 +18,6 @@ package org.apache.tika.parser.mp3;
 
 import java.util.List;
 
-
 /**
  * Interface that defines the common interface for ID3 tag parsers,
  *  such as ID3v1 and ID3v2.3.
@@ -172,12 +171,22 @@ public interface ID3Tags {
 
     String getTitle();
 
+    /**
+     * The Artist for the track
+     */
     String getArtist();
 
+    /**
+     * The Artist for the overall album / compilation of albums
+     */
+    String getAlbumArtist();
+
     String getAlbum();
     
     String getComposer();
 
+    String getCompilation();
+    
     /**
      * Retrieves the comments, if any.
      * Files may have more than one comment, but normally only 
@@ -189,9 +198,17 @@ public interface ID3Tags {
 
     String getYear();
 
+    /**
+     * The number of the track within the album / recording
+     */
     String getTrackNumber();
 
     /**
+     * The number of the disc this belongs to, within the set
+     */
+    String getDisc();
+
+    /**
      * Represents a comments in ID3 (especially ID3 v2), where are 
      *  made up of several parts
      */

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v1Handler.java
 Sun Aug 31 19:36:36 2014
@@ -121,6 +121,30 @@ public class ID3v1Handler implements ID3
     }
 
     /**
+     * ID3v1 doesn't have album-wide artists,
+     *  so returns null;
+     */
+    public String getAlbumArtist() {
+        return null;
+    }
+
+    /**
+     * ID3v1 doesn't have disc numbers,
+     *  so returns null;
+     */
+    public String getDisc() {
+        return null;
+    }
+
+    /**
+     * ID3v1 doesn't have compilations,
+     *  so returns null;
+     */
+    public String getCompilation() {
+        return null;
+    }
+
+    /**
      * Returns the identified ISO-8859-1 substring from the given byte buffer.
      * The return value is the zero-terminated substring retrieved from
      * between the given start and end positions in the given byte buffer.

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v22Handler.java
 Sun Aug 31 19:36:36 2014
@@ -39,6 +39,8 @@ public class ID3v22Handler implements ID
     private String composer;
     private String genre;
     private String trackNumber;
+    private String albumArtist;
+    private String disc;
     private List<ID3Comment> comments = new ArrayList<ID3Comment>();
 
     public ID3v22Handler(ID3v2Frame frame)
@@ -50,6 +52,8 @@ public class ID3v22Handler implements ID
                 title = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TP1")) {
                 artist = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TP2")) {
+                albumArtist = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TAL")) {
                 album = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TYE")) {
@@ -60,6 +64,8 @@ public class ID3v22Handler implements ID
                 comments.add( getComment(tag.data, 0, tag.data.length) ); 
             } else if (tag.name.equals("TRK")) {
                 trackNumber = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TPA")) {
+                disc = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TCO")) {
                 genre = extractGenre( getTagString(tag.data, 0, 
tag.data.length) );
             }
@@ -129,10 +135,25 @@ public class ID3v22Handler implements ID
         return trackNumber;
     }
 
+    public String getAlbumArtist() {
+        return albumArtist;
+    }
+
+    public String getDisc() {
+        return disc;
+    }
+
+    /**
+     * ID3v22 doesn't have compilations,
+     *  so returns null;
+     */
+    public String getCompilation() {
+        return null;
+    }
+
     private class RawV22TagIterator extends RawTagIterator {
         private RawV22TagIterator(ID3v2Frame frame) {
             frame.super(3, 3, 1, 0);
         }
     }
-
 }

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v23Handler.java
 Sun Aug 31 19:36:36 2014
@@ -39,6 +39,9 @@ public class ID3v23Handler implements ID
     private String composer;
     private String genre;
     private String trackNumber;
+    private String albumArtist;
+    private String disc;
+    private String compilation;
     private List<ID3Comment> comments = new ArrayList<ID3Comment>();
 
     public ID3v23Handler(ID3v2Frame frame)
@@ -50,6 +53,8 @@ public class ID3v23Handler implements ID
                 title = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TPE1")) {
                 artist = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TPE2")) {
+                albumArtist = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TALB")) {
                 album = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TYER")) {
@@ -60,6 +65,10 @@ public class ID3v23Handler implements ID
                 comments.add( getComment(tag.data, 0, tag.data.length) ); 
             } else if (tag.name.equals("TRCK")) {
                 trackNumber = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TPOS")) {
+                disc = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TCMP")) {
+                compilation = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TCON")) {
                 genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, 
tag.data.length) );
             }
@@ -109,10 +118,21 @@ public class ID3v23Handler implements ID
         return trackNumber;
     }
 
+    public String getAlbumArtist() {
+        return albumArtist;
+    }
+
+    public String getDisc() {
+        return disc;
+    }
+
+    public String getCompilation() {
+        return compilation;
+    }
+
     private class RawV23TagIterator extends RawTagIterator {
         private RawV23TagIterator(ID3v2Frame frame) {
             frame.super(4, 4, 1, 2);
         }
     }
-
 }

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ID3v24Handler.java
 Sun Aug 31 19:36:36 2014
@@ -40,6 +40,9 @@ public class ID3v24Handler implements ID
     private String composer;
     private String genre;
     private String trackNumber;
+    private String albumArtist;
+    private String disc;
+    private String compilation;
     private List<ID3Comment> comments = new ArrayList<ID3Comment>();
 
     public ID3v24Handler(ID3v2Frame frame)
@@ -51,6 +54,8 @@ public class ID3v24Handler implements ID
                 title = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TPE1")) {
                 artist = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TPE2")) {
+                albumArtist = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TALB")) {
                 album = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TYER")) {
@@ -65,6 +70,10 @@ public class ID3v24Handler implements ID
                 comments.add( getComment(tag.data, 0, tag.data.length) ); 
             } else if (tag.name.equals("TRCK")) {
                 trackNumber = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TPOS")) {
+                disc = getTagString(tag.data, 0, tag.data.length); 
+            } else if (tag.name.equals("TCMP")) {
+                compilation = getTagString(tag.data, 0, tag.data.length); 
             } else if (tag.name.equals("TCON")) {
                genre = ID3v22Handler.extractGenre( getTagString(tag.data, 0, 
tag.data.length) );
             }
@@ -114,10 +123,21 @@ public class ID3v24Handler implements ID
         return trackNumber;
     }
 
+    public String getAlbumArtist() {
+        return albumArtist;
+    }
+
+    public String getDisc() {
+        return disc;
+    }
+
+    public String getCompilation() {
+        return compilation;
+    }
+
     private class RawV24TagIterator extends RawTagIterator {
         private RawV24TagIterator(ID3v2Frame frame) {
             frame.super(4, 4, 1, 2);
         }
     }
-
 }

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/LyricsHandler.java
 Sun Aug 31 19:36:36 2014
@@ -82,7 +82,7 @@ public class LyricsHandler {
             //  size including the LYRICSBEGIN but excluding the 
             //  length+LYRICS200 at the end.
             int length = Integer.parseInt(
-                    new String(tagData, lookat-6, 6)
+                    new String(tagData, lookat-6, 6, "UTF-8")
             );
 
             String lyrics = new String(

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
 Sun Aug 31 19:36:36 2014
@@ -76,8 +76,10 @@ public class Mp3Parser extends AbstractP
            metadata.set(TikaCoreProperties.TITLE, tag.getTitle());
            metadata.set(TikaCoreProperties.CREATOR, tag.getArtist());
            metadata.set(XMPDM.ARTIST, tag.getArtist());
+           metadata.set(XMPDM.ALBUM_ARTIST, tag.getAlbumArtist());
            metadata.set(XMPDM.COMPOSER, tag.getComposer());
            metadata.set(XMPDM.ALBUM, tag.getAlbum());
+           metadata.set(XMPDM.COMPILATION, tag.getCompilation());
            metadata.set(XMPDM.RELEASE_DATE, tag.getYear());
            metadata.set(XMPDM.GENRE, tag.getGenre());
            metadata.set(XMPDM.DURATION, audioAndTags.duration);
@@ -107,12 +109,18 @@ public class Mp3Parser extends AbstractP
            xhtml.element("p", tag.getArtist());
 
             // ID3v1.1 Track addition
+            StringBuilder sb = new StringBuilder();
+            sb.append(tag.getAlbum());
             if (tag.getTrackNumber() != null) {
-                xhtml.element("p", tag.getAlbum() + ", track " + 
tag.getTrackNumber());
+                sb.append(", track ").append(tag.getTrackNumber());
                 metadata.set(XMPDM.TRACK_NUMBER, tag.getTrackNumber());
-            } else {
-                xhtml.element("p", tag.getAlbum());
             }
+            if (tag.getDisc() != null) {
+                sb.append(", disc ").append(tag.getDisc());
+                metadata.set(XMPDM.DISC_NUMBER, tag.getDisc());
+            }
+            xhtml.element("p", sb.toString());
+            
             xhtml.element("p", tag.getYear());
             xhtml.element("p", tag.getGenre());
             xhtml.element("p", String.valueOf(audioAndTags.duration));

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
 Sun Aug 31 19:36:36 2014
@@ -31,6 +31,7 @@ import org.apache.tika.io.TikaInputStrea
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMP;
 import org.apache.tika.metadata.XMPDM;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
@@ -55,7 +56,10 @@ import com.coremedia.iso.boxes.apple.App
 import com.coremedia.iso.boxes.sampleentry.AudioSampleEntry;
 import com.googlecode.mp4parser.boxes.apple.AppleAlbumBox;
 import com.googlecode.mp4parser.boxes.apple.AppleArtistBox;
+import com.googlecode.mp4parser.boxes.apple.AppleArtist2Box;
 import com.googlecode.mp4parser.boxes.apple.AppleCommentBox;
+import com.googlecode.mp4parser.boxes.apple.AppleCompilationBox;
+import com.googlecode.mp4parser.boxes.apple.AppleDiskNumberBox;
 import com.googlecode.mp4parser.boxes.apple.AppleEncoderBox;
 import com.googlecode.mp4parser.boxes.apple.AppleGenreBox;
 import com.googlecode.mp4parser.boxes.apple.AppleNameBox;
@@ -217,6 +221,10 @@ public class MP4Parser extends AbstractP
                   addMetadata(TikaCoreProperties.CREATOR, metadata, artist);
                   addMetadata(XMPDM.ARTIST, metadata, artist);
 
+                  // Album Artist
+                  AppleArtist2Box artist2 = getOrNull(apple, 
AppleArtist2Box.class);
+                  addMetadata(XMPDM.ALBUM_ARTIST, metadata, artist2);
+
                   // Album
                   AppleAlbumBox album = getOrNull(apple, AppleAlbumBox.class);
                   addMetadata(XMPDM.ALBUM, metadata, album);
@@ -242,13 +250,27 @@ public class MP4Parser extends AbstractP
                      //metadata.set(XMPDM.NUMBER_OF_TRACKS, trackNum.getB()); 
// TODO
                   }
 
+                  // Disc number
+                  AppleDiskNumberBox discNum = getOrNull(apple, 
AppleDiskNumberBox.class);
+                  if (discNum != null) {
+                     metadata.set(XMPDM.DISC_NUMBER, discNum.getA());
+                  }
+
+                  // Compilation
+                  AppleCompilationBox compilation = getOrNull(apple, 
AppleCompilationBox.class);
+                  if (compilation != null) {
+                      metadata.set(XMPDM.COMPILATION, 
(int)compilation.getValue());
+                  }
+
                   // Comment
                   AppleCommentBox comment = getOrNull(apple, 
AppleCommentBox.class);
                   addMetadata(XMPDM.LOG_COMMENT, metadata, comment);
 
                   // Encoder
                   AppleEncoderBox encoder = getOrNull(apple, 
AppleEncoderBox.class);
-                  // addMetadata(XMPDM.???, metadata, encoder); // TODO
+                  if (encoder != null) {
+                      metadata.set(XMP.CREATOR_TOOL, encoder.getValue());
+                  }
 
 
                   // As text

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/odf/NSNormalizerContentHandler.java
 Sun Aug 31 19:36:36 2014
@@ -18,6 +18,7 @@ package org.apache.tika.parser.odf;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.Locale;
 
 import org.apache.tika.sax.ContentHandlerDecorator;
 import org.xml.sax.Attributes;
@@ -87,7 +88,7 @@ public class NSNormalizerContentHandler 
     @Override
     public InputSource resolveEntity(String publicId, String systemId)
             throws IOException, SAXException {
-        if ((systemId != null && systemId.toLowerCase().endsWith(".dtd"))
+        if ((systemId != null && 
systemId.toLowerCase(Locale.ROOT).endsWith(".dtd"))
                 || DTD_PUBLIC_ID.equals(publicId)) {
             return new InputSource(new StringReader(""));
         } else {

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
 Sun Aug 31 19:36:36 2014
@@ -25,6 +25,7 @@ import java.util.Calendar;
 import java.util.HashSet;
 import java.util.List;
 import java.util.ListIterator;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
@@ -81,9 +82,10 @@ import org.xml.sax.helpers.AttributesImp
 class PDF2XHTML extends PDFTextStripper {
     
     /**
-     * format used for signature dates
+     * Format used for signature dates
+     * TODO Make this thread-safe
      */
-    private final SimpleDateFormat dateFormat = new 
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ");
+    private final SimpleDateFormat dateFormat = new 
SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.ROOT);
  
     /**
      * Maximum recursive depth during AcroForm processing.

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
 Sun Aug 31 19:36:36 2014
@@ -22,6 +22,7 @@ import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collections;
 import java.util.List;
+import java.util.Locale;
 import java.util.Set;
 
 import org.apache.jempbox.xmp.XMPSchema;
@@ -204,7 +205,7 @@ public class PDFParser extends AbstractP
             // Invalid date format, just ignore
         }
         try {
-            Calendar modified = info.getModificationDate(); 
+            Calendar modified = info.getModificationDate();
             addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
             addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
         } catch (IOException e) {
@@ -214,7 +215,7 @@ public class PDFParser extends AbstractP
         // All remaining metadata is custom
         // Copy this over as-is
         List<String> handledMetadata = Arrays.asList("Author", "Creator", 
"CreationDate", "ModDate",
-             "Keywords", "Producer", "Subject", "Title", "Trapped");
+                "Keywords", "Producer", "Subject", "Title", "Trapped");
         for(COSName key : info.getDictionary().keySet()) {
             String name = key.getName();
             if(! handledMetadata.contains(name)) {
@@ -241,7 +242,7 @@ public class PDFParser extends AbstractP
                     metadata.set("pdfaid:part", 
Integer.toString(pdfaxmp.getPart()));
                     if (pdfaxmp.getConformance() != null) {
                         metadata.set("pdfaid:conformance", 
pdfaxmp.getConformance());
-                        String version = 
"A-"+pdfaxmp.getPart()+pdfaxmp.getConformance().toLowerCase();
+                        String version = 
"A-"+pdfaxmp.getPart()+pdfaxmp.getConformance().toLowerCase(Locale.ROOT);
                         metadata.set("pdfa:PDFVersion", version );
                         metadata.add(TikaCoreProperties.FORMAT.getName(), 
                             MEDIA_TYPE.toString()+"; version=\""+version+"\"" 
);

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 Sun Aug 31 19:36:36 2014
@@ -20,6 +20,7 @@ package org.apache.tika.parser.pdf;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Serializable;
+import java.util.Locale;
 import java.util.Properties;
 
 import org.apache.pdfbox.util.PDFTextStripper;
@@ -337,9 +338,9 @@ public class PDFParserConfig implements 
         if (p == null){
             return defaultMissing;
         }
-        if (p.toLowerCase().equals("true")) {
+        if (p.toLowerCase(Locale.ROOT).equals("true")) {
             return true;
-        } else if (p.toLowerCase().equals("false")) {
+        } else if (p.toLowerCase(Locale.ROOT).equals("false")) {
             return false;
         } else {
             return defaultMissing;

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
 Sun Aug 31 19:36:36 2014
@@ -67,7 +67,6 @@ public class PackageParser extends Abstr
     private static final MediaType CPIO = MediaType.application("x-cpio");
     private static final MediaType DUMP = 
MediaType.application("x-tika-unix-dump");
     private static final MediaType TAR = MediaType.application("x-tar");
-    // Enable this when COMPRESS-267 is fixed, see TIKA-1243
     private static final MediaType SEVENZ = 
MediaType.application("x-7z-compressed");
 
     private static final Set<MediaType> SUPPORTED_TYPES =
@@ -127,7 +126,7 @@ public class PackageParser extends Abstr
                 stream.reset();
                 TikaInputStream tstream = TikaInputStream.get(stream);
                 
-                // Pending a fix for COMPRESS_269, this bit is a little nasty
+                // Pending a fix for COMPRESS-269, this bit is a little nasty
                 ais = new SevenZWrapper(new SevenZFile(tstream.getFile()));
             } else {
                 throw new TikaException("Unknown non-streaming format " + 
sne.getFormat(), sne);

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
 Sun Aug 31 19:36:36 2014
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.util.Enumeration;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.Locale;
 import java.util.Set;
 import java.util.regex.Pattern;
 
@@ -245,11 +246,11 @@ public class ZipContainerDetector implem
         String docType = coreType.substring(0, coreType.lastIndexOf('.'));
 
         // The Macro Enabled formats are a little special
-        if(docType.toLowerCase().endsWith("macroenabled")) {
-            docType = docType.toLowerCase() + ".12";
+        if(docType.toLowerCase(Locale.ROOT).endsWith("macroenabled")) {
+            docType = docType.toLowerCase(Locale.ROOT) + ".12";
         }
 
-        if(docType.toLowerCase().endsWith("macroenabledtemplate")) {
+        if(docType.toLowerCase(Locale.ROOT).endsWith("macroenabledtemplate")) {
             docType = 
MACRO_TEMPLATE_PATTERN.matcher(docType).replaceAll("macroenabled.12");
         }
 

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
 Sun Aug 31 19:36:36 2014
@@ -24,6 +24,7 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
+import java.util.Locale;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.poi.poifs.filesystem.DirectoryNode;
@@ -102,9 +103,9 @@ class RTFObjDataParser {
         //readBytes tests for reading too many bytes
         byte[] embObjBytes = readBytes(is, dataSz);
 
-        if (className.toLowerCase().equals("package")){
+        if (className.toLowerCase(Locale.ROOT).equals("package")){
             return handlePackage(embObjBytes, metadata);
-        } else if (className.toLowerCase().equals("pbrush")) {
+        } else if (className.toLowerCase(Locale.ROOT).equals("pbrush")) {
             //simple bitmap bytes
             return embObjBytes;
         } else {

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
 Sun Aug 31 19:36:36 2014
@@ -29,7 +29,9 @@ import java.nio.charset.CodingErrorActio
 import java.util.Calendar;
 import java.util.HashMap;
 import java.util.LinkedList;
+import java.util.Locale;
 import java.util.Map;
+import java.util.TimeZone;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -1339,7 +1341,7 @@ final class TextExtractor {
         if (inHeader) {
             if (nextMetaData != null) {
                 if (nextMetaData == TikaCoreProperties.CREATED) {
-                    Calendar cal = Calendar.getInstance();
+                    Calendar cal = Calendar.getInstance(TimeZone.getDefault(), 
Locale.ROOT);
                     cal.set(year, month-1, day, hour, minute, 0);
                     metadata.set(nextMetaData, cal.getTime());
                 } else if (nextMetaData.isMultiValuePermitted()) {

Modified: 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
 Sun Aug 31 19:36:36 2014
@@ -130,7 +130,7 @@ public class FLVParser extends AbstractP
         int size = input.readUnsignedShort();
         byte[] chars = new byte[size];
         input.readFully(chars);
-        return new String(chars);
+        return new String(chars, "UTF-8");
     }
 
     private Object readAMFObject(DataInputStream input) throws IOException {

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TestParsers.java 
(original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TestParsers.java 
Sun Aug 31 19:36:36 2014
@@ -108,7 +108,8 @@ public class TestParsers extends TikaTes
 
     @Test
     public void testComment() throws Exception {
-        final String[] extensions = new String[] {"ppt", "pptx", "doc", 
"docx", "pdf", "rtf"};
+        final String[] extensions = new String[] {"ppt", "pptx", "doc", 
+            "docx", "xls", "xlsx", "pdf", "rtf"};
         for(String extension : extensions) {
             verifyComment(extension, "testComment");
         }

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TikaTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TikaTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TikaTest.java 
(original)
+++ tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/TikaTest.java 
Sun Aug 31 19:36:36 2014
@@ -16,6 +16,7 @@
  */
 package org.apache.tika;
 
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
@@ -88,6 +89,10 @@ public abstract class TikaTest {
        assertTrue(needle + " not found in:\n" + haystack, 
haystack.contains(needle));
     }
 
+    public static void assertNotContained(String needle, String haystack) {
+        assertFalse(needle + " unexpectedly found in:\n" + haystack, 
haystack.contains(needle));
+     }
+
     protected static class XMLResult {
         public final String xml;
         public final Metadata metadata;

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
 Sun Aug 31 19:36:36 2014
@@ -34,6 +34,7 @@ import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.HashMap;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.tika.embedder.Embedder;
@@ -58,7 +59,7 @@ import org.xml.sax.SAXException;
 public class ExternalEmbedderTest {
 
     protected static final DateFormat EXPECTED_METADATA_DATE_FORMATTER =
-            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+            new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.ROOT);
     protected static final String DEFAULT_CHARSET = "UTF-8";
     private static final String COMMAND_METADATA_ARGUMENT_DESCRIPTION = 
"dc:description";
     private static final String TEST_TXT_PATH = "/test-documents/testTXT.txt";

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
 Sun Aug 31 19:36:36 2014
@@ -728,6 +728,12 @@ public class TestMimeTypes {
     }
     
     @Test
+    public void testAxCrypt() throws Exception {
+        // test-TXT.txt encrypted with a key of "tika"
+        assertTypeDetection("testTXT-tika.axx", "application/x-axcrypt");
+    }
+    
+    @Test
     public void testMatroskaDetection() throws Exception {
         assertType("video/x-matroska", "testMKV.mkv");
         // TODO: Need custom detector data detection, see TIKA-1180

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
 Sun Aug 31 19:36:36 2014
@@ -389,7 +389,7 @@ public class AutoDetectParserTest {
     public void testSpecificParserList() throws Exception {
         AutoDetectParser parser = new AutoDetectParser(new MyDetector(), new 
MyParser());
         
-        InputStream is = new ByteArrayInputStream("test".getBytes());
+        InputStream is = new ByteArrayInputStream("test".getBytes("UTF-8"));
         Metadata metadata = new Metadata();
         parser.parse(is, new BodyContentHandler(), metadata, new 
ParseContext());
         

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmBlockInfo.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmBlockInfo.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmBlockInfo.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmBlockInfo.java
 Sun Aug 31 19:36:36 2014
@@ -69,7 +69,7 @@ public class TestChmBlockInfo {
         int indexOfControlData = chmDirListCont.getControlDataIndex();
 
         int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
-                ChmConstants.LZXC.getBytes());
+                ChmConstants.LZXC.getBytes("UTF-8"));
         byte[] dir_chunk = null;
         if (indexOfResetTable > 0) {
             // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmItspHeader.java
 Sun Aug 31 19:36:36 2014
@@ -27,6 +27,8 @@ import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.io.UnsupportedEncodingException;
+
 /**
  * Tests all public methods of the ChmItspHeader
  * 
@@ -134,9 +136,9 @@ public class TestChmItspHeader {
     }
 
     @Test
-    public void testGetSignature() {
+    public void testGetSignature() throws UnsupportedEncodingException {
         assertEquals(TestParameters.VP_ISTP_SIGNATURE, new String(
-                chmItspHeader.getSignature()));
+                chmItspHeader.getSignature(), "UTF-8"));
     }
 
     @Test

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxState.java
 Sun Aug 31 19:36:36 2014
@@ -64,7 +64,7 @@ public class TestChmLzxState {
                 ChmConstants.CONTROL_DATA);
 
         int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
-                ChmConstants.LZXC.getBytes());
+                ChmConstants.LZXC.getBytes("UTF-8"));
         byte[] dir_chunk = null;
         if (indexOfResetTable > 0) {
             // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcControlData.java
 Sun Aug 31 19:36:36 2014
@@ -29,6 +29,8 @@ import org.apache.tika.parser.chm.core.C
 import org.junit.Before;
 import org.junit.Test;
 
+import java.io.UnsupportedEncodingException;
+
 /**
  * Tests all public methods of ChmLzxcControlData block
  */
@@ -60,7 +62,7 @@ public class TestChmLzxcControlData {
         int indexOfControlData = chmDirListCont.getControlDataIndex();
 
         int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
-                ChmConstants.LZXC.getBytes());
+                ChmConstants.LZXC.getBytes("UTF-8"));
         byte[] dir_chunk = null;
         if (indexOfResetTable > 0) {
             // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,
@@ -127,16 +129,16 @@ public class TestChmLzxcControlData {
     }
 
     @Test
-    public void testGetSignature() {
+    public void testGetSignature() throws UnsupportedEncodingException {
         assertEquals(
-                TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes().length,
+                
TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes("UTF-8").length,
                 chmLzxcControlData.getSignature().length);
     }
 
     @Test
-    public void testGetSignaure() {
+    public void testGetSignaure() throws UnsupportedEncodingException {
         assertEquals(
-                TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes().length,
+                
TestParameters.VP_CONTROL_DATA_SIGNATURE.getBytes("UTF-8").length,
                 chmLzxcControlData.getSignature().length);
     }
 

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmLzxcResetTable.java
 Sun Aug 31 19:36:36 2014
@@ -59,7 +59,7 @@ public class TestChmLzxcResetTable {
         int indexOfControlData = chmDirListCont.getControlDataIndex();
 
         int indexOfResetTable = ChmCommons.indexOfResetTableBlock(data,
-                ChmConstants.LZXC.getBytes());
+                ChmConstants.LZXC.getBytes("UTF-8"));
         byte[] dir_chunk = null;
         if (indexOfResetTable > 0) {
             // dir_chunk = Arrays.copyOfRange( data, indexOfResetTable,

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestPmglHeader.java
 Sun Aug 31 19:36:36 2014
@@ -25,6 +25,8 @@ import org.apache.tika.parser.chm.core.C
 import org.junit.Before;
 import org.junit.Test;
 
+import java.io.UnsupportedEncodingException;
+
 public class TestPmglHeader {
     ChmPmglHeader chmPmglHeader = null;
 
@@ -44,9 +46,9 @@ public class TestPmglHeader {
     }
 
     @Test
-    public void testChmPmglHeaderGet() {
+    public void testChmPmglHeaderGet() throws UnsupportedEncodingException {
         assertEquals(TestParameters.VP_PMGL_SIGNATURE, new String(
-                chmPmglHeader.getSignature()));
+                chmPmglHeader.getSignature(), "UTF-8"));
     }
 
     @Test

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
 Sun Aug 31 19:36:36 2014
@@ -62,7 +62,7 @@ public class SourceCodeParserTest extend
     assertTrue(textContent.length() > 0);
     assertTrue(textContent.indexOf("html") < 0);
     
-    textContent = getText(new ByteArrayInputStream("public class HelloWorld 
{}".getBytes()), sourceCodeParser, createMetadata("text/x-java-source"));
+    textContent = getText(new ByteArrayInputStream("public class HelloWorld 
{}".getBytes("UTF-8")), sourceCodeParser, createMetadata("text/x-java-source"));
     assertTrue(textContent.length() > 0);
     assertTrue(textContent.indexOf("html") < 0);
   }

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
 Sun Aug 31 19:36:36 2014
@@ -31,7 +31,13 @@ import org.xml.sax.ContentHandler;
 import org.apache.tika.io.TikaInputStream;
 import org.junit.Test;
 
-import static org.apache.tika.parser.font.AdobeFontMetricParser.*;
+import static org.apache.tika.parser.font.AdobeFontMetricParser.MET_FONT_NAME;
+import static 
org.apache.tika.parser.font.AdobeFontMetricParser.MET_FONT_FULL_NAME;
+import static 
org.apache.tika.parser.font.AdobeFontMetricParser.MET_FONT_FAMILY_NAME;
+import static 
org.apache.tika.parser.font.AdobeFontMetricParser.MET_FONT_WEIGHT;
+import static 
org.apache.tika.parser.font.AdobeFontMetricParser.MET_FONT_VERSION;
+import static 
org.apache.tika.parser.font.AdobeFontMetricParser.MET_FONT_SUB_FAMILY_NAME;
+import static org.apache.tika.parser.font.AdobeFontMetricParser.MET_PS_NAME;
 
 /**
  * Test case for parsing various different font files.

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
 Sun Aug 31 19:36:36 2014
@@ -16,10 +16,6 @@
  */
 package org.apache.tika.parser.image;
 
-import java.util.Arrays;
-import java.util.GregorianCalendar;
-import java.util.Iterator;
-import java.util.List;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
@@ -32,11 +28,20 @@ import com.drew.metadata.exif.ExifIFD0Di
 import com.drew.metadata.exif.ExifSubIFDDirectory;
 import com.drew.metadata.jpeg.JpegCommentDirectory;
 
+import java.util.Arrays;
+import java.util.GregorianCalendar;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.TimeZone;
+
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.*;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
 
 public class ImageMetadataExtractorTest {
     
@@ -57,7 +62,7 @@ public class ImageMetadataExtractorTest 
         verify(handler1).supports(JpegCommentDirectory.class);
         verify(handler1).handle(directory, metadata);
     }
-    
+
     @Test
     public void testExifHandlerSupports() {
         assertTrue(new 
ImageMetadataExtractor.ExifHandler().supports(ExifIFD0Directory.class));
@@ -70,8 +75,11 @@ public class ImageMetadataExtractorTest 
     public void testExifHandlerParseDate() throws MetadataException {
         ExifSubIFDDirectory exif = mock(ExifSubIFDDirectory.class);
         
when(exif.containsTag(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(true);
+        GregorianCalendar calendar = new 
GregorianCalendar(TimeZone.getDefault(), Locale.ROOT);
+        calendar.setTimeInMillis(0);
+        calendar.set(2000, 0, 1, 0, 0, 0);
         
when(exif.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL)).thenReturn(
-                new GregorianCalendar(2000, 0, 1, 0, 0, 0).getTime()); // jvm 
default timezone as in Metadata Extractor
+                calendar.getTime()); // jvm default timezone as in Metadata 
Extractor
         Metadata metadata = new Metadata();
         
         new ImageMetadataExtractor.ExifHandler().handle(exif, metadata);
@@ -83,8 +91,11 @@ public class ImageMetadataExtractorTest 
     public void testExifHandlerParseDateFallback() throws MetadataException {
         ExifIFD0Directory exif = mock(ExifIFD0Directory.class);
         
when(exif.containsTag(ExifIFD0Directory.TAG_DATETIME)).thenReturn(true);
+        GregorianCalendar calendar = new 
GregorianCalendar(TimeZone.getDefault(), Locale.ROOT);
+        calendar.setTimeInMillis(0);
+        calendar.set(1999, 0, 1, 0, 0, 0);
         when(exif.getDate(ExifIFD0Directory.TAG_DATETIME)).thenReturn(
-                new GregorianCalendar(1999, 0, 1, 0, 0, 0).getTime()); // jvm 
default timezone as in Metadata Extractor
+                calendar.getTime()); // jvm default timezone as in Metadata 
Extractor
         Metadata metadata = new Metadata();
         
         new ImageMetadataExtractor.ExifHandler().handle(exif, metadata);

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
 Sun Aug 31 19:36:36 2014
@@ -17,8 +17,9 @@
 package org.apache.tika.parser.microsoft;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.apache.tika.TikaTest.assertContains;
+import static org.apache.tika.TikaTest.assertNotContained;
 
 import java.io.InputStream;
 import java.util.Locale;
@@ -65,13 +66,13 @@ public class ExcelParserTest {
             assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
             
             String content = handler.toString();
-            assertTrue(content.contains("Sample Excel Worksheet"));
-            assertTrue(content.contains("Numbers and their Squares"));
-            assertTrue(content.contains("\t\tNumber\tSquare"));
-            assertTrue(content.contains("9"));
-            assertFalse(content.contains("9.0"));
-            assertTrue(content.contains("196"));
-            assertFalse(content.contains("196.0"));
+            assertContains("Sample Excel Worksheet", content);
+            assertContains("Numbers and their Squares", content);
+            assertContains("\t\tNumber\tSquare", content);
+            assertContains("9", content);
+            assertNotContained("9.0", content);
+            assertContains("196", content);
+            assertNotContained("196.0", content);
         } finally {
             input.close();
         }
@@ -95,12 +96,12 @@ public class ExcelParserTest {
             String content = handler.toString();
 
             // Number #,##0.00
-            assertTrue(content.contains("1,599.99"));
-            assertTrue(content.contains("-1,599.99"));
+            assertContains("1,599.99", content);
+            assertContains("-1,599.99", content);
 
             // Currency $#,##0.00;[Red]($#,##0.00)
-            assertTrue(content.contains("$1,599.99"));
-            assertTrue(content.contains("($1,599.99)"));
+            assertContains("$1,599.99", content);
+            assertContains("($1,599.99)", content);
 
             // Scientific 0.00E+00
             // poi <=3.8beta1 returns 1.98E08, newer versions return 1.98+E08
@@ -108,26 +109,29 @@ public class ExcelParserTest {
             assertTrue(content.contains("-1.98E08") || 
content.contains("-1.98E+08"));
 
             // Percentage.
-            assertTrue(content.contains("2.50%"));
+            assertContains("2.50%", content);
             // Excel rounds up to 3%, but that requires Java 1.6 or later
             if(System.getProperty("java.version").startsWith("1.5")) {
-                assertTrue(content.contains("2%"));
+                assertContains("2%", content);
             } else {
-                assertTrue(content.contains("3%"));
+                assertContains("3%", content);
             }
 
             // Time Format: h:mm
-            assertTrue(content.contains("6:15"));
-            assertTrue(content.contains("18:15"));
+            assertContains("6:15", content);
+            assertContains("18:15", content);
 
             // Date Format: d-mmm-yy
-            assertTrue(content.contains("17-May-07"));
+            assertContains("17-May-07", content);
 
             // Date Format: m/d/yy
-            assertTrue(content.contains("10/3/09"));
+            assertContains("10/3/09", content);
             
             // Date/Time Format: m/d/yy h:mm
-            assertTrue(content.contains("1/19/08 4:35"));
+            assertContains("1/19/08 4:35", content);
+
+            // Fraction (2.5): # ?/?
+            assertContains("2 1/2", content);
 
             
             // Below assertions represent outstanding formatting issues to be 
addressed
@@ -136,13 +140,10 @@ public class ExcelParserTest {
 
             
/*************************************************************************
             // Custom Number (0 "dollars and" .00 "cents")
-            assertTrue(content.contains("19 dollars and .99 cents"));
+            assertContains("19 dollars and .99 cents", content);
 
             // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
-            assertTrue(content.contains("At 4:20 AM on Thursday May 17, 
2007"));
-
-            // Fraction (2.5): # ?/?  (TODO Coming in POI 3.8 beta 6)
-            assertTrue(content.contains("2 1 / 2"));
+            assertContains("At 4:20 AM on Thursday May 17, 2007", content);
             
**************************************************************************/
 
         } finally {
@@ -171,21 +172,21 @@ public class ExcelParserTest {
             String content = handler.toString();
             
             // The first sheet has a pie chart
-            assertTrue(content.contains("charttabyodawg"));
-            assertTrue(content.contains("WhamPuff"));
+            assertContains("charttabyodawg", content);
+            assertContains("WhamPuff", content);
             
             // The second sheet has a bar chart and some text
-            assertTrue(content.contains("Sheet1"));
-            assertTrue(content.contains("Test Excel Spreasheet"));
-            assertTrue(content.contains("foo"));
-            assertTrue(content.contains("bar"));
-            assertTrue(content.contains("fizzlepuff"));
-            assertTrue(content.contains("whyaxis"));
-            assertTrue(content.contains("eksaxis"));
+            assertContains("Sheet1", content);
+            assertContains("Test Excel Spreasheet", content);
+            assertContains("foo", content);
+            assertContains("bar", content);
+            assertContains("fizzlepuff", content);
+            assertContains("whyaxis", content);
+            assertContains("eksaxis", content);
             
             // The third sheet has some text
-            assertTrue(content.contains("Sheet2"));
-            assertTrue(content.contains("dingdong"));
+            assertContains("Sheet2", content);
+            assertContains("dingdong", content);
         } finally {
             input.close();
         }
@@ -206,7 +207,7 @@ public class ExcelParserTest {
                     "application/vnd.ms-excel",
                     metadata.get(Metadata.CONTENT_TYPE));
             String content = handler.toString();
-            assertTrue(content.contains("Number Formats"));
+            assertContains("Number Formats", content);
         } finally {
             input.close();
         }
@@ -224,7 +225,7 @@ public class ExcelParserTest {
             new OfficeParser().parse(input, handler, metadata, context);
 
             String content = handler.toString();
-            assertTrue(content.contains("Microsoft Works"));
+            assertContains("Microsoft Works", content);
         } finally {
             input.close();
         }

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
 Sun Aug 31 19:36:36 2014
@@ -249,6 +249,6 @@ public class OutlookParserTest {
         
         // Make sure we don't have nested html docs
         assertEquals(2, content.split("<body>").length);
-        //assertEquals(2, content.split("<\\/body>").length); // TODO Fix
+        assertEquals(2, content.split("<\\/body>").length);
     }
 }

Modified: 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java?rev=1621623&r1=1621622&r2=1621623&view=diff
==============================================================================
--- 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
 (original)
+++ 
tika/branches/1.6/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
 Sun Aug 31 19:36:36 2014
@@ -92,20 +92,20 @@ public class PowerPointParserTest extend
         for(int row=1;row<=3;row++) {
             //assertContains("·\tBullet " + row, content);
             //assertContains("\u00b7\tBullet " + row, content);
+            // TODO OfficeParser fails to extract the bullet symbol
             assertContains("Bullet " + row, content);
         }
         assertContains("Here is a numbered list:", content);
         for(int row=1;row<=3;row++) {
             //assertContains(row + ")\tNumber bullet " + row, content);
             //assertContains(row + ") Number bullet " + row, content);
-            // TODO: OOXMLExtractor fails to number the bullets:
+            // TODO: OfficeParser fails to number the bullets:
             assertContains("Number bullet " + row, content);
         }
 
         for(int row=1;row<=2;row++) {
             for(int col=1;col<=3;col++) {
-               // TODO Work out why the upgrade to POI 3.9 broke this test 
(table text)
-//                assertContains("Row " + row + " Col " + col, content);
+                assertContains("Row " + row + " Col " + col, content);
             }
         }
 
@@ -153,7 +153,10 @@ public class PowerPointParserTest extend
        assertEquals(-1, content.indexOf("*"));
     }
 
-    // TODO: once we fix TIKA-712, re-enable this
+    /**
+     * TIKA-712 Master Slide Text from PPT and PPTX files
+     *  should be extracted too
+     */
     @Test
     public void testMasterText() throws Exception {
         ContentHandler handler = new BodyContentHandler();
@@ -177,7 +180,6 @@ public class PowerPointParserTest extend
         assertEquals(-1, content.indexOf("*"));
     }
 
-    // TODO: once we fix TIKA-712, re-enable this
     @Test
     public void testMasterText2() throws Exception {
         ContentHandler handler = new BodyContentHandler();


Reply via email to