Repository: tika
Updated Branches:
  refs/heads/master b47f162a5 -> dcaeccbab


TIKA-1513 -- update mime type according to Nick Burch's recommendation, other 
small import clean up


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/dcaeccba
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/dcaeccba
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/dcaeccba

Branch: refs/heads/master
Commit: dcaeccbab69519811e0cdf349873ce2b51e6ca10
Parents: b47f162
Author: tballison <[email protected]>
Authored: Thu May 26 10:00:30 2016 -0400
Committer: tballison <[email protected]>
Committed: Thu May 26 10:00:30 2016 -0400

----------------------------------------------------------------------
 .../org/apache/tika/parser/dbf/DBFParser.java   | 16 +++---
 .../org/apache/tika/parser/dbf/DBFReader.java   | 58 +++++++++++++-------
 .../apache/tika/parser/dbf/DBFParserTest.java   | 24 +++-----
 3 files changed, 54 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/dcaeccba/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
index 6b77d3d..d4f2bda 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
@@ -16,9 +16,6 @@
  */
 package org.apache.tika.parser.dbf;
 
-import org.apache.fontbox.encoding.Encoding;
-import org.apache.tika.detect.AutoDetectReader;
-import org.apache.tika.detect.Detector;
 import org.apache.tika.detect.EncodingDetector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
@@ -38,7 +35,11 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
-import java.util.*;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
 
 /**
  * This is a Tika wrapper around the DBFReader.
@@ -50,7 +51,6 @@ import java.util.*;
  */
 public class DBFParser extends AbstractParser {
 
-    public static final String DBF_VERSION_MIME_ATTRIBUTE = "dbf_version";
     private static final int ROWS_TO_BUFFER_FOR_CHARSET_DETECTION = 10;
     private static final int MAX_CHARS_FOR_CHARSET_DETECTION = 20000;
     private static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
@@ -68,9 +68,7 @@ public class DBFParser extends AbstractParser {
                       ParseContext context) throws IOException, SAXException, 
TikaException {
         DBFReader reader = DBFReader.open(stream);
         DBFFileHeader header = reader.getHeader();
-
-        metadata.set(Metadata.CONTENT_TYPE, "application/x-dbf; "+
-                DBF_VERSION_MIME_ATTRIBUTE+"="+header.getVersion().getName());
+        metadata.set(Metadata.CONTENT_TYPE, 
header.getVersion().getFullMimeString());
 
         //insert metadata here
         Calendar lastModified = header.getLastModified();
@@ -82,7 +80,7 @@ public class DBFParser extends AbstractParser {
         List<DBFRow> firstRows = new LinkedList<>();
         DBFRow row = reader.next();
         int i = 0;
-        while(row != null && i++ < ROWS_TO_BUFFER_FOR_CHARSET_DETECTION) {
+        while (row != null && i++ < ROWS_TO_BUFFER_FOR_CHARSET_DETECTION) {
             firstRows.add(row.deepCopy());
             row = reader.next();
         }

http://git-wip-us.apache.org/repos/asf/tika/blob/dcaeccba/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFReader.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
index 961244a..b70898b 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
@@ -33,7 +33,7 @@ import java.util.concurrent.ConcurrentHashMap;
  * is mutable and will change as the reader iterates over new rows.
  * <p>
  * This is based on: <a 
href="http://web.archive.org/web/20150323061445/http://ulisse.elettra.trieste.it/services/doc/dbase/DBFstruct.htm";>
- *     http://ulisse.elettra.trieste.it/services/doc/dbase/DBFstruct.htm</a>
+ * http://ulisse.elettra.trieste.it/services/doc/dbase/DBFstruct.htm</a>
  * <p>
  * This is designed to separate out Tika-specific code so that it can
  * be copied/pasted as a standalone if desired.
@@ -47,38 +47,56 @@ class DBFReader {
 
     enum Version {
 
-        FOXBASE(0x02, "FoxBASE"),
-        FOXBASE_PLUS(0x03, "FoxBASE_plus"),
-        VISUAL_FOXPRO(0x30, "Visual_FoxPro"),
-        VISUAL_FOXPRO_AUTOINCREMENT(0x31, "Visual_FoxPro_autoincrement"),
-        VISUAL_FOXPRO_VAR(0x32, "Visual_FoxPro_with_Varchar_or_Varbinary"),
-        DBASE_IV_SQL_TABLE(0x43, "dBASE_IV_SQL_table"),
-        DBASE_IV_SQL_SYSTEM(0x63, "dBASE_IV_SQL_system"),
-        FOX_BASE_PLUS_WITH_MEMO(0x83, "FoxBASE_plus_with_memo"),
-        DBASE_IV_WITH_MEMO(0x8B, "dBASE_IV_with_memo"),
-        DBASE_IV_SQL_TABLE_WITH_MEMO(0xCB, "dBASE_IV_SQL_table_with_memo"),
-        FOXPRO_2x_WITH_MEMO(0xF5, "FoxPro_2.x_with_memo"),
-        HIPER_SIZ_WITH_SMT_MEMO(0xE5, "HiPer-Siz_with_SMT_memo"),
-        FOXBASE2(0xFB, "FoxBASE");
+        FOXBASE(0x02, "FoxBASE", ""),
+        FOXBASE_PLUS(0x03, "FoxBASE_plus", ""),
+        VISUAL_FOXPRO(0x30, "Visual_FoxPro", ""),
+        VISUAL_FOXPRO_AUTOINCREMENT(0x31, "Visual_FoxPro", "autoincrement"),
+        VISUAL_FOXPRO_VAR(0x32, "Visual_FoxPro", "Varchar_or_Varbinary"),
+        DBASE_IV_SQL_TABLE(0x43, "dBASE_IV_SQL", "table"),
+        DBASE_IV_SQL_SYSTEM(0x63, "dBASE_IV_SQL", "system"),
+        FOX_BASE_PLUS_WITH_MEMO(0x83, "FoxBASE_plus", "memo"),
+        DBASE_IV_WITH_MEMO(0x8B, "dBASE_IV", "memo"),
+        DBASE_IV_SQL_TABLE_WITH_MEMO(0xCB, "dBASE_IV_SQL", "table_with_memo"),
+        FOXPRO_2x_WITH_MEMO(0xF5, "FoxPro_2.x", "memo"),
+        HIPER_SIZ_WITH_SMT_MEMO(0xE5, "HiPer-Siz", "SMT_memo"),
+        FOXBASE2(0xFB, "FoxBASE", "");
 
         private final int id;
-        private final String name;
+        private final String format;
+        private final String type;
 
-        Version(int id, String name) {
+        Version(int id, String format, String type) {
             this.id = id;
-            this.name = name;
+            this.format = format;
+            this.type = type;
         }
 
         int getId() {
             return id;
         }
 
-        String getName() {
-            return name;
+        String getFormat() {
+            return format;
         }
-    };
+
+        String getType() {
+            return type;
+        }
+
+        String getFullMimeString() {
+            StringBuilder sb = new StringBuilder();
+            sb.append("application/x-dbf; 
").append("format=").append(getFormat());
+            if (!"".equals(type)) {
+                sb.append("; type=").append(getType());
+            }
+            return sb.toString();
+        }
+    }
+
+    ;
 
     private static final Map<Integer, Version> VERSION_MAP = new 
ConcurrentHashMap<>();
+
     static {
         for (Version version : Version.values()) {
             VERSION_MAP.put(version.id, version);

http://git-wip-us.apache.org/repos/asf/tika/blob/dcaeccba/tika-parsers/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java 
b/tika-parsers/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
index 9d15e44..3ab043b 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
@@ -22,17 +22,12 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.sax.BodyContentHandler;
 import org.junit.Test;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 
@@ -44,7 +39,7 @@ public class DBFParserTest extends TikaTest {
     @Test
     public void testBasic() throws Exception {
         XMLResult r = getXML("testDBF.dbf");
-        assertEquals("application/x-dbf; dbf_version=FoxBASE_plus", 
r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals(DBFReader.Version.FOXBASE_PLUS.getFullMimeString(), 
r.metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("2016-05-24T00:00:00Z", 
r.metadata.get(TikaCoreProperties.MODIFIED));
         assertEquals("UTF-8", r.metadata.get(Metadata.CONTENT_ENCODING));
 
@@ -69,7 +64,7 @@ public class DBFParserTest extends TikaTest {
     @Test
     public void testGB18030Encoded() throws Exception {
         XMLResult r = getXML("testDBF_gb18030.dbf");
-        assertEquals("application/x-dbf; dbf_version=FoxBASE_plus", 
r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals(DBFReader.Version.FOXBASE_PLUS.getFullMimeString(), 
r.metadata.get(Metadata.CONTENT_TYPE));
         assertContains("虽然该", r.xml);
     }
 
@@ -80,8 +75,8 @@ public class DBFParserTest extends TikaTest {
         for (int i = 1; i < 129; i++) {
             try {
                 XMLResult r = getXML(truncate("testDBF.dbf", i), p, new 
Metadata());
-                fail("Should have thrown exception for truncation in header: 
"+i);
-            } catch (IOException|TikaException e) {
+                fail("Should have thrown exception for truncation in header: " 
+ i);
+            } catch (IOException | TikaException e) {
                 //ok -- expected
             } catch (Throwable e) {
                 fail("Should only throw IOExceptions or TikaExceptions");
@@ -91,8 +86,8 @@ public class DBFParserTest extends TikaTest {
         for (int i = 129; i < 204; i++) {
             try {
                 XMLResult r = getXML(truncate("testDBF.dbf", i), p, new 
Metadata());
-            } catch (IOException|TikaException e) {
-                fail("Shouldn't have thrown exception for truncation while 
reading cells: "+i);
+            } catch (IOException | TikaException e) {
+                fail("Shouldn't have thrown exception for truncation while 
reading cells: " + i);
                 e.printStackTrace();
             }
         }
@@ -134,10 +129,9 @@ public class DBFParserTest extends TikaTest {
 
         for (DBFReader.Version version : DBFReader.Version.values()) {
             //this cast happens to work because of the range of possible values
-            bytes[0] = (byte)version.getId();
+            bytes[0] = (byte) version.getId();
             XMLResult r = getXML(TikaInputStream.get(bytes), new 
AutoDetectParser(), new Metadata());
-            assertEquals("application/x-dbf; "+
-                    
DBFParser.DBF_VERSION_MIME_ATTRIBUTE+"="+version.getName(), 
r.metadata.get(Metadata.CONTENT_TYPE));
+            assertEquals(version.getFullMimeString(), 
r.metadata.get(Metadata.CONTENT_TYPE));
         }
     }
 
@@ -156,7 +150,7 @@ commented out until we get permission to add the test file
 
     InputStream truncate(String testFileName, int length) throws IOException {
         byte[] bytes = new byte[length];
-        try (InputStream is = 
getResourceAsStream("/test-documents/"+testFileName)) {
+        try (InputStream is = getResourceAsStream("/test-documents/" + 
testFileName)) {
             IOUtils.readFully(is, bytes);
         }
         return new ByteArrayInputStream(bytes);

Reply via email to