svn commit: r1575116 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/pdf/ test/java/org/apache/tika/parser/pdf/

tallison Thu, 06 Mar 2014 17:51:07 -0800

Author: tallison
Date: Fri Mar  7 01:50:34 2014
New Revision: 1575116

URL: http://svn.apache.org/r1575116
Log:
clean up whitespace in PDFParser components


Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1575116&r1=1575115&r2=1575116&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java 
Fri Mar  7 01:50:34 2014
@@ -191,11 +191,11 @@ class PDF2XHTML extends PDFTextStripper 
             extractEmbeddedDocuments(pdf, originalHandler);
             
             //extract acroform data at end of doc
-            if (config.getExtractAcroFormContent() == true){
+            if (config.getExtractAcroFormContent() == true) {
                 extractAcroForm(pdf, handler);
              }
             handler.endDocument();
-        } catch (TikaException e){
+        } catch (TikaException e) {
            throw new IOExceptionWithCause("Unable to end a document", e);
         } catch (SAXException e) {
             throw new IOExceptionWithCause("Unable to end a document", e);
@@ -350,7 +350,7 @@ class PDF2XHTML extends PDFTextStripper 
             throws IOException, SAXException, TikaException {
         PDDocumentCatalog catalog = document.getDocumentCatalog();
         PDDocumentNameDictionary names = catalog.getNames();
-        if (names == null){
+        if (names == null) {
             return;
         }
         PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles();
@@ -369,16 +369,16 @@ class PDF2XHTML extends PDFTextStripper 
         //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java
         //If there is a need we could add a fully recursive search to find a 
non-null
         //Map<String, COSObjectable> that contains the doc info.
-        if (embeddedFileNames != null){
+        if (embeddedFileNames != null) {
             processEmbeddedDocNames(embeddedFileNames, embeddedExtractor);
         } else {
             List<PDNameTreeNode> kids = embeddedFiles.getKids();
-            if (kids == null){
+            if (kids == null) {
                 return;
             }
-            for (PDNameTreeNode n : kids){
+            for (PDNameTreeNode n : kids) {
                 Map<String, COSObjectable> childNames = n.getNames();
-                if (childNames != null){
+                if (childNames != null) {
                     processEmbeddedDocNames(childNames, embeddedExtractor);
                 }
             }
@@ -388,7 +388,7 @@ class PDF2XHTML extends PDFTextStripper 
 
     private void processEmbeddedDocNames(Map<String, COSObjectable> 
embeddedFileNames, 
         EmbeddedDocumentExtractor embeddedExtractor) throws IOException, 
SAXException, TikaException {
-        if (embeddedFileNames == null){
+        if (embeddedFileNames == null) {
             return;
         }
         for (Map.Entry<String,COSObjectable> ent : 
embeddedFileNames.entrySet()) {
@@ -414,6 +414,7 @@ class PDF2XHTML extends PDFTextStripper 
             }
         }
     }
+
     private void extractAcroForm(PDDocument pdf, XHTMLContentHandler handler) 
throws IOException, 
     SAXException {
         //Thank you, Ben Litchfield, for 
org.apache.pdfbox.examples.fdf.PrintFields
@@ -426,63 +427,66 @@ class PDF2XHTML extends PDFTextStripper 
         PDAcroForm form = catalog.getAcroForm();
         if (form == null)
             return;
-        
+
         @SuppressWarnings("rawtypes")
         List fields = form.getFields();
 
         if (fields == null)
-           return;
-        
+            return;
+
         @SuppressWarnings("rawtypes")
         ListIterator itr  = fields.listIterator();
 
         if (itr == null)
-           return;
+            return;
 
         handler.startElement("div", "class", "acroform");
         handler.startElement("ol");
-        while (itr.hasNext()){
-           Object obj = itr.next();
-           if (obj != null && obj instanceof PDField){
-              processAcroField((PDField)obj, handler, 0);
-           }
+
+        while (itr.hasNext()) {
+            Object obj = itr.next();
+            if (obj != null && obj instanceof PDField) {
+                processAcroField((PDField)obj, handler, 0);
+            }
         }
         handler.endElement("ol");
         handler.endElement("div");
     }
-    
+
     private void processAcroField(PDField field, XHTMLContentHandler handler, 
final int recurseDepth)
             throws SAXException, IOException { 
-         
-          if (recurseDepth >= MAX_ACROFORM_RECURSIONS){
-             return;
-          }
-          
-          addFieldString(field, handler);
-          
-          @SuppressWarnings("rawtypes")
-          List kids = field.getKids();
-          if(kids != null){
-             
-             @SuppressWarnings("rawtypes")
-             Iterator kidsIter = kids.iterator();
-             if (kidsIter == null){
+
+        if (recurseDepth >= MAX_ACROFORM_RECURSIONS) {
+            return;
+        }
+
+        addFieldString(field, handler);
+
+        @SuppressWarnings("rawtypes")
+        List kids = field.getKids();
+        if(kids != null) {
+
+            @SuppressWarnings("rawtypes")
+            Iterator kidsIter = kids.iterator();
+            if (kidsIter == null) {
                 return;
-             }
-             int r = recurseDepth+1;
-             handler.startElement("ol");
-             while(kidsIter.hasNext()){
+            }
+            int r = recurseDepth+1;
+            handler.startElement("ol");
+            //TODO: can generate <ol/>. Rework to avoid that.
+            while(kidsIter.hasNext()) {
                 Object pdfObj = kidsIter.next();
-                if(pdfObj != null && pdfObj instanceof PDField){
-                   PDField kid = (PDField)pdfObj;
-                   //recurse
-                   processAcroField(kid, handler, r);
+                if(pdfObj != null && pdfObj instanceof PDField) {
+                    PDField kid = (PDField)pdfObj;
+                    //recurse
+                    processAcroField(kid, handler, r);
                 }
-             }
-             handler.endElement("ol");
-          }
-      }
-    private void addFieldString(PDField field, XHTMLContentHandler handler) 
throws SAXException{
+            }
+            handler.endElement("ol");
+        }
+    }
+
+    private void addFieldString(PDField field, XHTMLContentHandler handler) 
throws SAXException {
         //Pick partial name to present in content and altName for attribute
         //Ignoring FullyQualifiedName for now
         String partName = field.getPartialName();
@@ -491,28 +495,28 @@ class PDF2XHTML extends PDFTextStripper 
         StringBuilder sb = new StringBuilder();
         AttributesImpl attrs = new AttributesImpl();
 
-        if (partName != null){
+        if (partName != null) {
             sb.append(partName).append(": ");
         }
-        if (altName != null){
+        if (altName != null) {
             attrs.addAttribute("", "altName", "altName", "CDATA", altName);
         }
         //return early if PDSignature field
-        if (field instanceof PDSignatureField){
+        if (field instanceof PDSignatureField) {
             handleSignature(attrs, (PDSignatureField)field, handler);
             return;
         }
         try {
             //getValue can throw an IOException if there is no value
             String value = field.getValue();
-            if (value != null && ! value.equals("null")){
+            if (value != null && ! value.equals("null")) {
                 sb.append(value);
             }
         } catch (IOException e) {
             //swallow
         }
 
-        if (attrs.getLength() > 0 || sb.length() > 0){
+        if (attrs.getLength() > 0 || sb.length() > 0) {
             handler.startElement("li", attrs);
             handler.characters(sb.toString());
             handler.endElement("li");
@@ -520,11 +524,11 @@ class PDF2XHTML extends PDFTextStripper 
     }
 
     private void handleSignature(AttributesImpl parentAttributes, 
PDSignatureField sigField,
-            XHTMLContentHandler handler) throws SAXException{
-       
+            XHTMLContentHandler handler) throws SAXException {
+
 
         PDSignature sig = sigField.getSignature();
-        if (sig == null){
+        if (sig == null) {
             return;
         }
         Map<String, String> vals= new TreeMap<String, String>();
@@ -534,27 +538,27 @@ class PDF2XHTML extends PDFTextStripper 
         vals.put("reason", sig.getReason());
 
         Calendar cal = sig.getSignDate();
-        if (cal != null){
+        if (cal != null) {
             dateFormat.setTimeZone(cal.getTimeZone());
             vals.put("date", dateFormat.format(cal.getTime()));
         }
         //see if there is any data
         int nonNull = 0;
-        for (String val : vals.keySet()){
-            if (val != null && ! val.equals("")){
+        for (String val : vals.keySet()) {
+            if (val != null && ! val.equals("")) {
                 nonNull++;
             }
         }
         //if there is, process it
-        if (nonNull > 0){
+        if (nonNull > 0) {
             handler.startElement("li", parentAttributes);
 
             AttributesImpl attrs = new AttributesImpl();
             attrs.addAttribute("", "type", "type", "CDATA", "signaturedata");
 
             handler.startElement("ol", attrs);
-            for (Map.Entry<String, String> e : vals.entrySet()){
-                if (e.getValue() == null || e.getValue().equals("")){
+            for (Map.Entry<String, String> e : vals.entrySet()) {
+                if (e.getValue() == null || e.getValue().equals("")) {
                     continue;
                 }
                 attrs = new AttributesImpl();
@@ -568,3 +572,4 @@ class PDF2XHTML extends PDFTextStripper 
         }
     }
 }
+

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1575116&r1=1575115&r2=1575116&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java 
Fri Mar  7 01:50:34 2014
@@ -105,14 +105,14 @@ public class PDFParser extends AbstractP
             if (tstream != null && tstream.hasFile()) {
                 // File based, take that as a cue to use a temporary file
                 RandomAccess scratchFile = new 
RandomAccessFile(tmp.createTemporaryFile(), "rw");
-                if (localConfig.getUseNonSequentialParser() == true){
+                if (localConfig.getUseNonSequentialParser() == true) {
                     pdfDocument = PDDocument.loadNonSeq(new 
CloseShieldInputStream(stream), scratchFile);
                 } else {
                     pdfDocument = PDDocument.load(new 
CloseShieldInputStream(stream), scratchFile, true);
                 }
             } else {
                 // Go for the normal, stream based in-memory parsing
-                if (localConfig.getUseNonSequentialParser() == true){
+                if (localConfig.getUseNonSequentialParser() == true) {
                     pdfDocument = PDDocument.loadNonSeq(new 
CloseShieldInputStream(stream), new RandomAccessBuffer()); 
                 } else {
                     pdfDocument = PDDocument.load(new 
CloseShieldInputStream(stream), true);
@@ -166,13 +166,13 @@ public class PDFParser extends AbstractP
         org.apache.jempbox.xmp.XMPMetadata xmp = null;
         XMPSchemaDublinCore dcSchema = null;
         try{
-            if (document.getDocumentCatalog().getMetadata() != null){
+            if (document.getDocumentCatalog().getMetadata() != null) {
                 xmp = 
document.getDocumentCatalog().getMetadata().exportXMPMetadata();
             }
-            if (xmp != null){
+            if (xmp != null) {
                 dcSchema = xmp.getDublinCoreSchema();
             }
-        } catch (IOException e){
+        } catch (IOException e) {
             //swallow
         }
         PDDocumentInformation info = document.getDocumentInformation();
@@ -194,7 +194,7 @@ public class PDFParser extends AbstractP
             addMetadata(metadata, TikaCoreProperties.CREATED, 
info.getCreationDate());
         } catch (IOException e) {
             // Invalid date format, just ignore
-        } catch (StringIndexOutOfBoundsException e){
+        } catch (StringIndexOutOfBoundsException e) {
             //remove after PDFBOX-1803 is fixed (TIKA-1233)
             // Invalid date format, just ignore
         }
@@ -204,7 +204,7 @@ public class PDFParser extends AbstractP
             addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
         } catch (IOException e) {
             // Invalid date format, just ignore
-        } catch (StringIndexOutOfBoundsException e){
+        } catch (StringIndexOutOfBoundsException e) {
             //remove after PDFBOX-1803 is fixed (TIKA-1233)
             // Invalid date format, just ignore
         }
@@ -258,11 +258,11 @@ public class PDFParser extends AbstractP
                 // If it's an Adobe one, interpret it to determine the 
extension level:
                 if( extName.equals( COSName.getPDFName("ADBE") )) {
                     COSDictionary adobeExt = (COSDictionary) 
extensions.getDictionaryObject(extName);
-                    if( adobeExt != null ){
+                    if( adobeExt != null ) {
                         String baseVersion = 
adobeExt.getNameAsString(COSName.getPDFName("BaseVersion"));
                         int el = 
adobeExt.getInt(COSName.getPDFName("ExtensionLevel"));
                         //-1 is sentinel value that something went wrong in 
getInt
-                        if (el != -1){
+                        if (el != -1) {
                             metadata.set("pdf:PDFExtensionVersion", 
baseVersion+" Adobe Extension Level "+el );
                             metadata.add(TikaCoreProperties.FORMAT.getName(), 
                                 MEDIA_TYPE.toString()+"; 
version=\""+baseVersion+" Adobe Extension Level "+el+"\"");
@@ -287,21 +287,21 @@ public class PDFParser extends AbstractP
      */
     private void extractMultilingualItems(Metadata metadata, Property property,
             String pdfBoxBaseline, XMPSchema schema) {
-        if (schema == null){
-            if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){
+        if (schema == null) {
+            if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) {
                 metadata.add(property, pdfBoxBaseline);
             }
             return;
         }
         
-        for (String lang : 
schema.getLanguagePropertyLanguages(property.getName())){
+        for (String lang : 
schema.getLanguagePropertyLanguages(property.getName())) {
             String value = schema.getLanguageProperty(property.getName(), 
lang);
             if (value != null && pdfBoxBaseline != null 
-                    && ! value.equals(pdfBoxBaseline) && value.length() > 0){
+                    && ! value.equals(pdfBoxBaseline) && value.length() > 0) {
                 metadata.add(property, value);
             }
         }
-        if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){
+        if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) {
             metadata.add(property,  pdfBoxBaseline);
         }
     }
@@ -327,28 +327,28 @@ public class PDFParser extends AbstractP
      * @param metadata
      */
     private void extractDublinCoreListItems(Metadata metadata, Property 
property, 
-            String pdfBoxBaseline, XMPSchemaDublinCore dc){
+            String pdfBoxBaseline, XMPSchemaDublinCore dc) {
         //if no dc, add baseline and return
-        if (dc == null){
-            if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){
+        if (dc == null) {
+            if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) {
                 addMetadata(metadata, property, pdfBoxBaseline);
             }
             return;
         }
         List<String> items = getXMPBagOrSeqList(dc, property.getName());
-        if (items == null){
-            if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){
+        if (items == null) {
+            if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) {
                 addMetadata(metadata, property, pdfBoxBaseline);
             }
             return;
         }
-        for (String item : items){
-            if (pdfBoxBaseline != null && ! item.equals(pdfBoxBaseline)){
+        for (String item : items) {
+            if (pdfBoxBaseline != null && ! item.equals(pdfBoxBaseline)) {
                 addMetadata(metadata, property, item);
             }
         }
         //finally, add the baseline
-        if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){
+        if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) {
             addMetadata(metadata, property, pdfBoxBaseline);
         }    
     }
@@ -363,9 +363,9 @@ public class PDFParser extends AbstractP
      * @param name
      * @return list of values or null
      */
-    private List<String> getXMPBagOrSeqList(XMPSchema schema, String name){
+    private List<String> getXMPBagOrSeqList(XMPSchema schema, String name) {
         List<String> ret = schema.getBagList(name);
-        if (ret == null){
+        if (ret == null) {
             ret = schema.getSequenceList(name);
         }
         return ret;
@@ -406,16 +406,16 @@ public class PDFParser extends AbstractP
             }
         } else if(value instanceof COSString) {
             addMetadata(metadata, name, ((COSString)value).getString());
-        } else if (value != null){
+        } else if (value != null) {
             addMetadata(metadata, name, value.toString());
         }
     }
 
-    public void setPDFParserConfig(PDFParserConfig config){
+    public void setPDFParserConfig(PDFParserConfig config) {
         this.defaultConfig = config;
     }
     
-    public PDFParserConfig getPDFParserConfig(){
+    public PDFParserConfig getPDFParserConfig() {
         return defaultConfig;
     }
     
@@ -426,7 +426,7 @@ public class PDFParser extends AbstractP
      * 
      * @deprecated use {@link #setPDFParserConfig(PDFParserConfig)}
      */
-    public void setUseNonSequentialParser(boolean v){
+    public void setUseNonSequentialParser(boolean v) {
         defaultConfig.setUseNonSequentialParser(v);
     }
     
@@ -434,7 +434,7 @@ public class PDFParser extends AbstractP
      * @see #setUseNonSequentialParser(boolean) 
      * @deprecated use {@link #getPDFParserConfig()}
      */
-    public boolean getUseNonSequentialParser(){
+    public boolean getUseNonSequentialParser() {
         return defaultConfig.getUseNonSequentialParser();
     }
     

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java?rev=1575116&r1=1575115&r2=1575116&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java
 Fri Mar  7 01:50:34 2014
@@ -63,7 +63,7 @@ public class PDFParserConfig implements 
     //True if acroform content should be extracted
     private boolean extractAcroFormContent = true;
 
-    public PDFParserConfig(){
+    public PDFParserConfig() {
         init(this.getClass().getResourceAsStream("PDFParser.properties"));
     }
 
@@ -74,25 +74,25 @@ public class PDFParserConfig implements 
      * 
      * @param is
      */
-    public PDFParserConfig(InputStream is){
+    public PDFParserConfig(InputStream is) {
         init(is);
     }
 
     //initializes object and then tries to close inputstream
-    private void init(InputStream is){
+    private void init(InputStream is) {
 
-        if (is == null){
+        if (is == null) {
             return;
         }
         Properties props = new Properties();
-        try{
+        try {
             props.load(is);
-        } catch (IOException e){
+        } catch (IOException e) {
         } finally {
-            if (is != null){
+            if (is != null) {
                 try{
                     is.close();
-                } catch (IOException e){
+                } catch (IOException e) {
                     //swallow
                 }
             }
@@ -219,9 +219,9 @@ public class PDFParserConfig implements 
         if (p == null){
             return defaultMissing;
         }
-        if (p.toLowerCase().equals("true")){
+        if (p.toLowerCase().equals("true")) {
             return true;
-        } else if (p.toLowerCase().equals("false")){
+        } else if (p.toLowerCase().equals("false")) {
             return false;
         } else {
             return defaultMissing;
@@ -275,7 +275,4 @@ public class PDFParserConfig implements 
                 + ", useNonSequentialParser=" + useNonSequentialParser
                 + ", extractAcroFormContent=" + extractAcroFormContent + "]";
     }
-
-
-
 }

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1575116&r1=1575115&r2=1575116&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 Fri Mar  7 01:50:34 2014
@@ -478,7 +478,7 @@ public class PDFParserTest extends TikaT
        ParseContext context = new ParseContext();
        String content = "";
        InputStream stream = null;
-       try{
+       try {
           context.set(org.apache.tika.parser.Parser.class, parser);
           stream = 
getResourceAsStream("/test-documents/testPDFEmbeddingAndEmbedded.docx");
           parser.parse(stream, handler, metadata, context);
@@ -540,8 +540,8 @@ public class PDFParserTest extends TikaT
         //empty for now
         Set<String> knownContentDiffs = new HashSet<String>();
 
-        for (File f : testDocs.listFiles()){
-            if (! f.getName().toLowerCase().endsWith(".pdf")){
+        for (File f : testDocs.listFiles()) {
+            if (! f.getName().toLowerCase().endsWith(".pdf")) {
                 continue;
             }
 
@@ -552,14 +552,14 @@ public class PDFParserTest extends TikaT
             Metadata sequentialMetadata = new Metadata();
             String sequentialContent = getText(new FileInputStream(f), 
sequentialParser, context, sequentialMetadata);
 
-            if (knownContentDiffs.contains(f.getName())){
+            if (knownContentDiffs.contains(f.getName())) {
                 assertFalse(f.getName(), 
defaultContent.equals(sequentialContent));
             } else {
                 assertEquals(f.getName(), defaultContent, sequentialContent);
             }
 
             //skip this one file.
-            if (knownMetadataDiffs.contains(f.getName())){
+            if (knownMetadataDiffs.contains(f.getName())) {
                 //turn back on once PDFBOX-1922 is fixed
                 //assertFalse(f.getName(), 
defaultMetadata.equals(sequentialMetadata));
             } else {
@@ -624,7 +624,7 @@ public class PDFParserTest extends TikaT
 */
 
     //TIKA-1226
-    public void testSignatureInAcroForm() throws Exception{
+    public void testSignatureInAcroForm() throws Exception {
         //The current test doc does not contain any content in the signature 
area.
         //This just tests that a RuntimeException is not thrown.
         //TODO: find a better test file for this issue.
@@ -641,12 +641,12 @@ public class PDFParserTest extends TikaT
         TrackingHandler tracker = new TrackingHandler();
         TikaInputStream tis = null;
         ContainerExtractor ex = new ParserContainerExtractor();
-        try{
+        try {
             tis= TikaInputStream.get(
                 
getResourceAsStream("/test-documents/testPDF_childAttachments.pdf"));
             ex.extract(tis, ex, tracker);
         } finally {
-            if (tis != null){
+            if (tis != null) {
                 tis.close();
             }
         }
@@ -658,7 +658,7 @@ public class PDFParserTest extends TikaT
         assertEquals(TYPE_DOC, tracker.mediaTypes.get(1));
     }
 
-    public void testVersions() throws Exception{
+    public void testVersions() throws Exception {
         
         Map<String, String> dcFormat = new HashMap<String, String>();
         dcFormat.put("4.x", "application/pdf; version=1.3");
@@ -686,7 +686,7 @@ public class PDFParserTest extends TikaT
         pdfExtensionVersions.put("11.x.PDFA-1b", "1.7 Adobe Extension Level 
8");
 
         Parser p = new AutoDetectParser();
-        for (Map.Entry<String, String> e : dcFormat.entrySet()){
+        for (Map.Entry<String, String> e : dcFormat.entrySet()) {
             String fName = "testPDF_Version."+e.getKey()+".pdf";
             InputStream is = PDFParserTest.class.getResourceAsStream(
                     "/test-documents/"+fName);
@@ -697,14 +697,14 @@ public class PDFParserTest extends TikaT
             is.close();
             boolean foundDC = false;
             String[] vals = m.getValues("dc:format");
-            for (String v : vals){
-                if (v.equals(e.getValue())){
+            for (String v : vals) {
+                if (v.equals(e.getValue())) {
                     foundDC = true;
                 }
             }
             assertTrue("dc:format ::" + e.getValue(), foundDC);
             String extensionVersionTruth = 
pdfExtensionVersions.get(e.getKey());
-            if (extensionVersionTruth != null){
+            if (extensionVersionTruth != null) {
                 assertEquals("pdf:PDFExtensionVersion :: 
"+extensionVersionTruth,
                         extensionVersionTruth, 
                         m.get("pdf:PDFExtensionVersion"));
@@ -722,14 +722,14 @@ public class PDFParserTest extends TikaT
         p.parse(is, h, m, c);
         is.close();
         Set<String> versions = new HashSet<String>();
-        for (String fmt : m.getValues("dc:format")){
+        for (String fmt : m.getValues("dc:format")) {
             versions.add(fmt);
         }
         
         for (String hit : new String[]{ "application/pdf; version=1.7",
           "application/pdf; version=\"A-1b\"",
           "application/pdf; version=\"1.7 Adobe Extension Level 8\""
-        }){
+        }) {
             assertTrue(hit, versions.contains(hit));
         }
         
@@ -748,14 +748,14 @@ public class PDFParserTest extends TikaT
         p.parse(is, h, m, c);
         is.close();
         
-        String[] keys = new String[]{
+        String[] keys = new String[] {
                 "dc:creator",
                 "meta:author",
                 "creator",
                 "Author"
         };
 
-        for (String k : keys){
+        for (String k : keys) {
             String[] vals = m.getValues(k);
             assertEquals("number of authors == 2 for key: "+ k, 2, 
vals.length);
             Set<String> set = new HashSet<String>();
@@ -765,6 +765,7 @@ public class PDFParserTest extends TikaT
             assertTrue("Sample Author 2", set.contains("Sample Author 2"));
         }
     }
+
     /**
      * This is a workaround until PDFBox-1922 is fixed.
      * The goal is to test for equality but skip the version issue.
@@ -782,16 +783,16 @@ public class PDFParserTest extends TikaT
         
         assertEquals("metadata length: "+fName, thisNames.length, 
thatMetadata.names().length);
         
-        for (String n : thisNames){
+        for (String n : thisNames) {
             //don't pay attention to differences here for now
-            if (n.equals("pdf:PDFVersion") || n.equals("dc:format")){
+            if (n.equals("pdf:PDFVersion") || n.equals("dc:format")) {
                 continue;
             }
-            if (thisMetadata.isMultiValued(n) && 
thatMetadata.isMultiValued(n)){
+            if (thisMetadata.isMultiValued(n) && 
thatMetadata.isMultiValued(n)) {
                 String[] thisValues = thisMetadata.getValues(n);
                 String[] thatValues = thatMetadata.getValues(n);
                 testEqualMetadataValue(fName, thisValues, thatValues);
-            } else if (! thisMetadata.isMultiValued(n) && ! 
thatMetadata.isMultiValued(n)){
+            } else if (! thisMetadata.isMultiValued(n) && ! 
thatMetadata.isMultiValued(n)) {
                 assertEquals("unequal multivalued values: " + fName, 
thisMetadata.get(n), thatMetadata.get(n));
             } else {
                 //one is multivalued and the other isn't
@@ -800,15 +801,15 @@ public class PDFParserTest extends TikaT
         }
     }
     
-    private void testEqualMetadataValue(String fName, String[] thisValues, 
String[] thatValues){
+    private void testEqualMetadataValue(String fName, String[] thisValues, 
String[] thatValues) {
         assertTrue("null equality of metadata values: "+fName, 
                 (thisValues == null && thatValues == null) ||
                 (thisValues != null && thatValues != null));
 
         assertEquals("metadata values length: "+fName, thisValues.length, 
thatValues.length);
         List<String> list = Arrays.asList(thatValues);
-        for (String v : thisValues){
-            if (! list.contains(v)){
+        for (String v : thisValues) {
+            if (! list.contains(v)) {
                 assertTrue("metadata value; that doesn't contain" + v, false);
             }
         }

svn commit: r1575116 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/pdf/ test/java/org/apache/tika/parser/pdf/

Reply via email to