Author: tallison Date: Fri Mar 7 01:50:34 2014 New Revision: 1575116 URL: http://svn.apache.org/r1575116 Log: clean up whitespace in PDFParser components
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1575116&r1=1575115&r2=1575116&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java Fri Mar 7 01:50:34 2014 @@ -191,11 +191,11 @@ class PDF2XHTML extends PDFTextStripper extractEmbeddedDocuments(pdf, originalHandler); //extract acroform data at end of doc - if (config.getExtractAcroFormContent() == true){ + if (config.getExtractAcroFormContent() == true) { extractAcroForm(pdf, handler); } handler.endDocument(); - } catch (TikaException e){ + } catch (TikaException e) { throw new IOExceptionWithCause("Unable to end a document", e); } catch (SAXException e) { throw new IOExceptionWithCause("Unable to end a document", e); @@ -350,7 +350,7 @@ class PDF2XHTML extends PDFTextStripper throws IOException, SAXException, TikaException { PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentNameDictionary names = catalog.getNames(); - if (names == null){ + if (names == null) { return; } PDEmbeddedFilesNameTreeNode embeddedFiles = names.getEmbeddedFiles(); @@ -369,16 +369,16 @@ class PDF2XHTML extends PDFTextStripper //This code follows: pdfbox/examples/pdmodel/ExtractEmbeddedFiles.java //If there is a need we could add a fully recursive search to find a non-null //Map<String, COSObjectable> that contains the doc info. - if (embeddedFileNames != null){ + if (embeddedFileNames != null) { processEmbeddedDocNames(embeddedFileNames, embeddedExtractor); } else { List<PDNameTreeNode> kids = embeddedFiles.getKids(); - if (kids == null){ + if (kids == null) { return; } - for (PDNameTreeNode n : kids){ + for (PDNameTreeNode n : kids) { Map<String, COSObjectable> childNames = n.getNames(); - if (childNames != null){ + if (childNames != null) { processEmbeddedDocNames(childNames, embeddedExtractor); } } @@ -388,7 +388,7 @@ class PDF2XHTML extends PDFTextStripper private void processEmbeddedDocNames(Map<String, COSObjectable> embeddedFileNames, EmbeddedDocumentExtractor embeddedExtractor) throws IOException, SAXException, TikaException { - if (embeddedFileNames == null){ + if (embeddedFileNames == null) { return; } for (Map.Entry<String,COSObjectable> ent : embeddedFileNames.entrySet()) { @@ -414,6 +414,7 @@ class PDF2XHTML extends PDFTextStripper } } } + private void extractAcroForm(PDDocument pdf, XHTMLContentHandler handler) throws IOException, SAXException { //Thank you, Ben Litchfield, for org.apache.pdfbox.examples.fdf.PrintFields @@ -426,63 +427,66 @@ class PDF2XHTML extends PDFTextStripper PDAcroForm form = catalog.getAcroForm(); if (form == null) return; - + @SuppressWarnings("rawtypes") List fields = form.getFields(); if (fields == null) - return; - + return; + @SuppressWarnings("rawtypes") ListIterator itr = fields.listIterator(); if (itr == null) - return; + return; handler.startElement("div", "class", "acroform"); handler.startElement("ol"); - while (itr.hasNext()){ - Object obj = itr.next(); - if (obj != null && obj instanceof PDField){ - processAcroField((PDField)obj, handler, 0); - } + + while (itr.hasNext()) { + Object obj = itr.next(); + if (obj != null && obj instanceof PDField) { + processAcroField((PDField)obj, handler, 0); + } } handler.endElement("ol"); handler.endElement("div"); } - + private void processAcroField(PDField field, XHTMLContentHandler handler, final int recurseDepth) throws SAXException, IOException { - - if (recurseDepth >= MAX_ACROFORM_RECURSIONS){ - return; - } - - addFieldString(field, handler); - - @SuppressWarnings("rawtypes") - List kids = field.getKids(); - if(kids != null){ - - @SuppressWarnings("rawtypes") - Iterator kidsIter = kids.iterator(); - if (kidsIter == null){ + + if (recurseDepth >= MAX_ACROFORM_RECURSIONS) { + return; + } + + addFieldString(field, handler); + + @SuppressWarnings("rawtypes") + List kids = field.getKids(); + if(kids != null) { + + @SuppressWarnings("rawtypes") + Iterator kidsIter = kids.iterator(); + if (kidsIter == null) { return; - } - int r = recurseDepth+1; - handler.startElement("ol"); - while(kidsIter.hasNext()){ + } + int r = recurseDepth+1; + handler.startElement("ol"); + //TODO: can generate <ol/>. Rework to avoid that. + while(kidsIter.hasNext()) { Object pdfObj = kidsIter.next(); - if(pdfObj != null && pdfObj instanceof PDField){ - PDField kid = (PDField)pdfObj; - //recurse - processAcroField(kid, handler, r); + if(pdfObj != null && pdfObj instanceof PDField) { + PDField kid = (PDField)pdfObj; + //recurse + processAcroField(kid, handler, r); } - } - handler.endElement("ol"); - } - } - private void addFieldString(PDField field, XHTMLContentHandler handler) throws SAXException{ + } + handler.endElement("ol"); + } + } + + private void addFieldString(PDField field, XHTMLContentHandler handler) throws SAXException { //Pick partial name to present in content and altName for attribute //Ignoring FullyQualifiedName for now String partName = field.getPartialName(); @@ -491,28 +495,28 @@ class PDF2XHTML extends PDFTextStripper StringBuilder sb = new StringBuilder(); AttributesImpl attrs = new AttributesImpl(); - if (partName != null){ + if (partName != null) { sb.append(partName).append(": "); } - if (altName != null){ + if (altName != null) { attrs.addAttribute("", "altName", "altName", "CDATA", altName); } //return early if PDSignature field - if (field instanceof PDSignatureField){ + if (field instanceof PDSignatureField) { handleSignature(attrs, (PDSignatureField)field, handler); return; } try { //getValue can throw an IOException if there is no value String value = field.getValue(); - if (value != null && ! value.equals("null")){ + if (value != null && ! value.equals("null")) { sb.append(value); } } catch (IOException e) { //swallow } - if (attrs.getLength() > 0 || sb.length() > 0){ + if (attrs.getLength() > 0 || sb.length() > 0) { handler.startElement("li", attrs); handler.characters(sb.toString()); handler.endElement("li"); @@ -520,11 +524,11 @@ class PDF2XHTML extends PDFTextStripper } private void handleSignature(AttributesImpl parentAttributes, PDSignatureField sigField, - XHTMLContentHandler handler) throws SAXException{ - + XHTMLContentHandler handler) throws SAXException { + PDSignature sig = sigField.getSignature(); - if (sig == null){ + if (sig == null) { return; } Map<String, String> vals= new TreeMap<String, String>(); @@ -534,27 +538,27 @@ class PDF2XHTML extends PDFTextStripper vals.put("reason", sig.getReason()); Calendar cal = sig.getSignDate(); - if (cal != null){ + if (cal != null) { dateFormat.setTimeZone(cal.getTimeZone()); vals.put("date", dateFormat.format(cal.getTime())); } //see if there is any data int nonNull = 0; - for (String val : vals.keySet()){ - if (val != null && ! val.equals("")){ + for (String val : vals.keySet()) { + if (val != null && ! val.equals("")) { nonNull++; } } //if there is, process it - if (nonNull > 0){ + if (nonNull > 0) { handler.startElement("li", parentAttributes); AttributesImpl attrs = new AttributesImpl(); attrs.addAttribute("", "type", "type", "CDATA", "signaturedata"); handler.startElement("ol", attrs); - for (Map.Entry<String, String> e : vals.entrySet()){ - if (e.getValue() == null || e.getValue().equals("")){ + for (Map.Entry<String, String> e : vals.entrySet()) { + if (e.getValue() == null || e.getValue().equals("")) { continue; } attrs = new AttributesImpl(); @@ -568,3 +572,4 @@ class PDF2XHTML extends PDFTextStripper } } } + Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1575116&r1=1575115&r2=1575116&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Fri Mar 7 01:50:34 2014 @@ -105,14 +105,14 @@ public class PDFParser extends AbstractP if (tstream != null && tstream.hasFile()) { // File based, take that as a cue to use a temporary file RandomAccess scratchFile = new RandomAccessFile(tmp.createTemporaryFile(), "rw"); - if (localConfig.getUseNonSequentialParser() == true){ + if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), scratchFile); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), scratchFile, true); } } else { // Go for the normal, stream based in-memory parsing - if (localConfig.getUseNonSequentialParser() == true){ + if (localConfig.getUseNonSequentialParser() == true) { pdfDocument = PDDocument.loadNonSeq(new CloseShieldInputStream(stream), new RandomAccessBuffer()); } else { pdfDocument = PDDocument.load(new CloseShieldInputStream(stream), true); @@ -166,13 +166,13 @@ public class PDFParser extends AbstractP org.apache.jempbox.xmp.XMPMetadata xmp = null; XMPSchemaDublinCore dcSchema = null; try{ - if (document.getDocumentCatalog().getMetadata() != null){ + if (document.getDocumentCatalog().getMetadata() != null) { xmp = document.getDocumentCatalog().getMetadata().exportXMPMetadata(); } - if (xmp != null){ + if (xmp != null) { dcSchema = xmp.getDublinCoreSchema(); } - } catch (IOException e){ + } catch (IOException e) { //swallow } PDDocumentInformation info = document.getDocumentInformation(); @@ -194,7 +194,7 @@ public class PDFParser extends AbstractP addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate()); } catch (IOException e) { // Invalid date format, just ignore - } catch (StringIndexOutOfBoundsException e){ + } catch (StringIndexOutOfBoundsException e) { //remove after PDFBOX-1803 is fixed (TIKA-1233) // Invalid date format, just ignore } @@ -204,7 +204,7 @@ public class PDFParser extends AbstractP addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); } catch (IOException e) { // Invalid date format, just ignore - } catch (StringIndexOutOfBoundsException e){ + } catch (StringIndexOutOfBoundsException e) { //remove after PDFBOX-1803 is fixed (TIKA-1233) // Invalid date format, just ignore } @@ -258,11 +258,11 @@ public class PDFParser extends AbstractP // If it's an Adobe one, interpret it to determine the extension level: if( extName.equals( COSName.getPDFName("ADBE") )) { COSDictionary adobeExt = (COSDictionary) extensions.getDictionaryObject(extName); - if( adobeExt != null ){ + if( adobeExt != null ) { String baseVersion = adobeExt.getNameAsString(COSName.getPDFName("BaseVersion")); int el = adobeExt.getInt(COSName.getPDFName("ExtensionLevel")); //-1 is sentinel value that something went wrong in getInt - if (el != -1){ + if (el != -1) { metadata.set("pdf:PDFExtensionVersion", baseVersion+" Adobe Extension Level "+el ); metadata.add(TikaCoreProperties.FORMAT.getName(), MEDIA_TYPE.toString()+"; version=\""+baseVersion+" Adobe Extension Level "+el+"\""); @@ -287,21 +287,21 @@ public class PDFParser extends AbstractP */ private void extractMultilingualItems(Metadata metadata, Property property, String pdfBoxBaseline, XMPSchema schema) { - if (schema == null){ - if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){ + if (schema == null) { + if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { metadata.add(property, pdfBoxBaseline); } return; } - for (String lang : schema.getLanguagePropertyLanguages(property.getName())){ + for (String lang : schema.getLanguagePropertyLanguages(property.getName())) { String value = schema.getLanguageProperty(property.getName(), lang); if (value != null && pdfBoxBaseline != null - && ! value.equals(pdfBoxBaseline) && value.length() > 0){ + && ! value.equals(pdfBoxBaseline) && value.length() > 0) { metadata.add(property, value); } } - if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){ + if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { metadata.add(property, pdfBoxBaseline); } } @@ -327,28 +327,28 @@ public class PDFParser extends AbstractP * @param metadata */ private void extractDublinCoreListItems(Metadata metadata, Property property, - String pdfBoxBaseline, XMPSchemaDublinCore dc){ + String pdfBoxBaseline, XMPSchemaDublinCore dc) { //if no dc, add baseline and return - if (dc == null){ - if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){ + if (dc == null) { + if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { addMetadata(metadata, property, pdfBoxBaseline); } return; } List<String> items = getXMPBagOrSeqList(dc, property.getName()); - if (items == null){ - if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){ + if (items == null) { + if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { addMetadata(metadata, property, pdfBoxBaseline); } return; } - for (String item : items){ - if (pdfBoxBaseline != null && ! item.equals(pdfBoxBaseline)){ + for (String item : items) { + if (pdfBoxBaseline != null && ! item.equals(pdfBoxBaseline)) { addMetadata(metadata, property, item); } } //finally, add the baseline - if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0){ + if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { addMetadata(metadata, property, pdfBoxBaseline); } } @@ -363,9 +363,9 @@ public class PDFParser extends AbstractP * @param name * @return list of values or null */ - private List<String> getXMPBagOrSeqList(XMPSchema schema, String name){ + private List<String> getXMPBagOrSeqList(XMPSchema schema, String name) { List<String> ret = schema.getBagList(name); - if (ret == null){ + if (ret == null) { ret = schema.getSequenceList(name); } return ret; @@ -406,16 +406,16 @@ public class PDFParser extends AbstractP } } else if(value instanceof COSString) { addMetadata(metadata, name, ((COSString)value).getString()); - } else if (value != null){ + } else if (value != null) { addMetadata(metadata, name, value.toString()); } } - public void setPDFParserConfig(PDFParserConfig config){ + public void setPDFParserConfig(PDFParserConfig config) { this.defaultConfig = config; } - public PDFParserConfig getPDFParserConfig(){ + public PDFParserConfig getPDFParserConfig() { return defaultConfig; } @@ -426,7 +426,7 @@ public class PDFParser extends AbstractP * * @deprecated use {@link #setPDFParserConfig(PDFParserConfig)} */ - public void setUseNonSequentialParser(boolean v){ + public void setUseNonSequentialParser(boolean v) { defaultConfig.setUseNonSequentialParser(v); } @@ -434,7 +434,7 @@ public class PDFParser extends AbstractP * @see #setUseNonSequentialParser(boolean) * @deprecated use {@link #getPDFParserConfig()} */ - public boolean getUseNonSequentialParser(){ + public boolean getUseNonSequentialParser() { return defaultConfig.getUseNonSequentialParser(); } Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java?rev=1575116&r1=1575115&r2=1575116&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParserConfig.java Fri Mar 7 01:50:34 2014 @@ -63,7 +63,7 @@ public class PDFParserConfig implements //True if acroform content should be extracted private boolean extractAcroFormContent = true; - public PDFParserConfig(){ + public PDFParserConfig() { init(this.getClass().getResourceAsStream("PDFParser.properties")); } @@ -74,25 +74,25 @@ public class PDFParserConfig implements * * @param is */ - public PDFParserConfig(InputStream is){ + public PDFParserConfig(InputStream is) { init(is); } //initializes object and then tries to close inputstream - private void init(InputStream is){ + private void init(InputStream is) { - if (is == null){ + if (is == null) { return; } Properties props = new Properties(); - try{ + try { props.load(is); - } catch (IOException e){ + } catch (IOException e) { } finally { - if (is != null){ + if (is != null) { try{ is.close(); - } catch (IOException e){ + } catch (IOException e) { //swallow } } @@ -219,9 +219,9 @@ public class PDFParserConfig implements if (p == null){ return defaultMissing; } - if (p.toLowerCase().equals("true")){ + if (p.toLowerCase().equals("true")) { return true; - } else if (p.toLowerCase().equals("false")){ + } else if (p.toLowerCase().equals("false")) { return false; } else { return defaultMissing; @@ -275,7 +275,4 @@ public class PDFParserConfig implements + ", useNonSequentialParser=" + useNonSequentialParser + ", extractAcroFormContent=" + extractAcroFormContent + "]"; } - - - } Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1575116&r1=1575115&r2=1575116&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java (original) +++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java Fri Mar 7 01:50:34 2014 @@ -478,7 +478,7 @@ public class PDFParserTest extends TikaT ParseContext context = new ParseContext(); String content = ""; InputStream stream = null; - try{ + try { context.set(org.apache.tika.parser.Parser.class, parser); stream = getResourceAsStream("/test-documents/testPDFEmbeddingAndEmbedded.docx"); parser.parse(stream, handler, metadata, context); @@ -540,8 +540,8 @@ public class PDFParserTest extends TikaT //empty for now Set<String> knownContentDiffs = new HashSet<String>(); - for (File f : testDocs.listFiles()){ - if (! f.getName().toLowerCase().endsWith(".pdf")){ + for (File f : testDocs.listFiles()) { + if (! f.getName().toLowerCase().endsWith(".pdf")) { continue; } @@ -552,14 +552,14 @@ public class PDFParserTest extends TikaT Metadata sequentialMetadata = new Metadata(); String sequentialContent = getText(new FileInputStream(f), sequentialParser, context, sequentialMetadata); - if (knownContentDiffs.contains(f.getName())){ + if (knownContentDiffs.contains(f.getName())) { assertFalse(f.getName(), defaultContent.equals(sequentialContent)); } else { assertEquals(f.getName(), defaultContent, sequentialContent); } //skip this one file. - if (knownMetadataDiffs.contains(f.getName())){ + if (knownMetadataDiffs.contains(f.getName())) { //turn back on once PDFBOX-1922 is fixed //assertFalse(f.getName(), defaultMetadata.equals(sequentialMetadata)); } else { @@ -624,7 +624,7 @@ public class PDFParserTest extends TikaT */ //TIKA-1226 - public void testSignatureInAcroForm() throws Exception{ + public void testSignatureInAcroForm() throws Exception { //The current test doc does not contain any content in the signature area. //This just tests that a RuntimeException is not thrown. //TODO: find a better test file for this issue. @@ -641,12 +641,12 @@ public class PDFParserTest extends TikaT TrackingHandler tracker = new TrackingHandler(); TikaInputStream tis = null; ContainerExtractor ex = new ParserContainerExtractor(); - try{ + try { tis= TikaInputStream.get( getResourceAsStream("/test-documents/testPDF_childAttachments.pdf")); ex.extract(tis, ex, tracker); } finally { - if (tis != null){ + if (tis != null) { tis.close(); } } @@ -658,7 +658,7 @@ public class PDFParserTest extends TikaT assertEquals(TYPE_DOC, tracker.mediaTypes.get(1)); } - public void testVersions() throws Exception{ + public void testVersions() throws Exception { Map<String, String> dcFormat = new HashMap<String, String>(); dcFormat.put("4.x", "application/pdf; version=1.3"); @@ -686,7 +686,7 @@ public class PDFParserTest extends TikaT pdfExtensionVersions.put("11.x.PDFA-1b", "1.7 Adobe Extension Level 8"); Parser p = new AutoDetectParser(); - for (Map.Entry<String, String> e : dcFormat.entrySet()){ + for (Map.Entry<String, String> e : dcFormat.entrySet()) { String fName = "testPDF_Version."+e.getKey()+".pdf"; InputStream is = PDFParserTest.class.getResourceAsStream( "/test-documents/"+fName); @@ -697,14 +697,14 @@ public class PDFParserTest extends TikaT is.close(); boolean foundDC = false; String[] vals = m.getValues("dc:format"); - for (String v : vals){ - if (v.equals(e.getValue())){ + for (String v : vals) { + if (v.equals(e.getValue())) { foundDC = true; } } assertTrue("dc:format ::" + e.getValue(), foundDC); String extensionVersionTruth = pdfExtensionVersions.get(e.getKey()); - if (extensionVersionTruth != null){ + if (extensionVersionTruth != null) { assertEquals("pdf:PDFExtensionVersion :: "+extensionVersionTruth, extensionVersionTruth, m.get("pdf:PDFExtensionVersion")); @@ -722,14 +722,14 @@ public class PDFParserTest extends TikaT p.parse(is, h, m, c); is.close(); Set<String> versions = new HashSet<String>(); - for (String fmt : m.getValues("dc:format")){ + for (String fmt : m.getValues("dc:format")) { versions.add(fmt); } for (String hit : new String[]{ "application/pdf; version=1.7", "application/pdf; version=\"A-1b\"", "application/pdf; version=\"1.7 Adobe Extension Level 8\"" - }){ + }) { assertTrue(hit, versions.contains(hit)); } @@ -748,14 +748,14 @@ public class PDFParserTest extends TikaT p.parse(is, h, m, c); is.close(); - String[] keys = new String[]{ + String[] keys = new String[] { "dc:creator", "meta:author", "creator", "Author" }; - for (String k : keys){ + for (String k : keys) { String[] vals = m.getValues(k); assertEquals("number of authors == 2 for key: "+ k, 2, vals.length); Set<String> set = new HashSet<String>(); @@ -765,6 +765,7 @@ public class PDFParserTest extends TikaT assertTrue("Sample Author 2", set.contains("Sample Author 2")); } } + /** * This is a workaround until PDFBox-1922 is fixed. * The goal is to test for equality but skip the version issue. @@ -782,16 +783,16 @@ public class PDFParserTest extends TikaT assertEquals("metadata length: "+fName, thisNames.length, thatMetadata.names().length); - for (String n : thisNames){ + for (String n : thisNames) { //don't pay attention to differences here for now - if (n.equals("pdf:PDFVersion") || n.equals("dc:format")){ + if (n.equals("pdf:PDFVersion") || n.equals("dc:format")) { continue; } - if (thisMetadata.isMultiValued(n) && thatMetadata.isMultiValued(n)){ + if (thisMetadata.isMultiValued(n) && thatMetadata.isMultiValued(n)) { String[] thisValues = thisMetadata.getValues(n); String[] thatValues = thatMetadata.getValues(n); testEqualMetadataValue(fName, thisValues, thatValues); - } else if (! thisMetadata.isMultiValued(n) && ! thatMetadata.isMultiValued(n)){ + } else if (! thisMetadata.isMultiValued(n) && ! thatMetadata.isMultiValued(n)) { assertEquals("unequal multivalued values: " + fName, thisMetadata.get(n), thatMetadata.get(n)); } else { //one is multivalued and the other isn't @@ -800,15 +801,15 @@ public class PDFParserTest extends TikaT } } - private void testEqualMetadataValue(String fName, String[] thisValues, String[] thatValues){ + private void testEqualMetadataValue(String fName, String[] thisValues, String[] thatValues) { assertTrue("null equality of metadata values: "+fName, (thisValues == null && thatValues == null) || (thisValues != null && thatValues != null)); assertEquals("metadata values length: "+fName, thisValues.length, thatValues.length); List<String> list = Arrays.asList(thatValues); - for (String v : thisValues){ - if (! list.contains(v)){ + for (String v : thisValues) { + if (! list.contains(v)) { assertTrue("metadata value; that doesn't contain" + v, false); } }