Repository: tika
Updated Branches:
  refs/heads/2.x 8d24e07fb -> e855648af


TIKA-1513 -- update mime type according to Nick Burch's recommendation, other 
small import clean up


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/15ec358c
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/15ec358c
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/15ec358c

Branch: refs/heads/2.x
Commit: 15ec358c44867adc44ab0431960d565b3d8a3e2c
Parents: 8d24e07
Author: tballison <[email protected]>
Authored: Thu May 26 10:04:55 2016 -0400
Committer: tballison <[email protected]>
Committed: Thu May 26 10:04:55 2016 -0400

----------------------------------------------------------------------
 .../org/apache/tika/parser/dbf/DBFParser.java   |  7 +--
 .../org/apache/tika/parser/dbf/DBFReader.java   | 59 +++++++++++++-------
 .../apache/tika/parser/dbf/DBFParserTest.java   |  7 +--
 3 files changed, 43 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/15ec358c/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
index 7200da3..f8fa1a2 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFParser.java
@@ -49,7 +49,6 @@ import java.util.Set;
  */
 public class DBFParser extends AbstractParser {
 
-    public static final String DBF_VERSION_MIME_ATTRIBUTE = "dbf_version";
     private static final int ROWS_TO_BUFFER_FOR_CHARSET_DETECTION = 10;
     private static final int MAX_CHARS_FOR_CHARSET_DETECTION = 20000;
     private static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
@@ -67,9 +66,7 @@ public class DBFParser extends AbstractParser {
                       ParseContext context) throws IOException, SAXException, 
TikaException {
         DBFReader reader = DBFReader.open(stream);
         DBFFileHeader header = reader.getHeader();
-
-        metadata.set(Metadata.CONTENT_TYPE, "application/x-dbf; "+
-                DBF_VERSION_MIME_ATTRIBUTE+"="+header.getVersion().getName());
+        metadata.set(Metadata.CONTENT_TYPE, 
header.getVersion().getFullMimeString());
 
         //insert metadata here
         Calendar lastModified = header.getLastModified();
@@ -81,7 +78,7 @@ public class DBFParser extends AbstractParser {
         List<DBFRow> firstRows = new LinkedList<>();
         DBFRow row = reader.next();
         int i = 0;
-        while(row != null && i++ < ROWS_TO_BUFFER_FOR_CHARSET_DETECTION) {
+        while (row != null && i++ < ROWS_TO_BUFFER_FOR_CHARSET_DETECTION) {
             firstRows.add(row.deepCopy());
             row = reader.next();
         }

http://git-wip-us.apache.org/repos/asf/tika/blob/15ec358c/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
index 961244a..674e238 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/dbf/DBFReader.java
@@ -33,7 +33,7 @@ import java.util.concurrent.ConcurrentHashMap;
  * is mutable and will change as the reader iterates over new rows.
  * <p>
  * This is based on: <a 
href="http://web.archive.org/web/20150323061445/http://ulisse.elettra.trieste.it/services/doc/dbase/DBFstruct.htm";>
- *     http://ulisse.elettra.trieste.it/services/doc/dbase/DBFstruct.htm</a>
+ * http://ulisse.elettra.trieste.it/services/doc/dbase/DBFstruct.htm</a>
  * <p>
  * This is designed to separate out Tika-specific code so that it can
  * be copied/pasted as a standalone if desired.
@@ -46,39 +46,56 @@ class DBFReader {
 
 
     enum Version {
-
-        FOXBASE(0x02, "FoxBASE"),
-        FOXBASE_PLUS(0x03, "FoxBASE_plus"),
-        VISUAL_FOXPRO(0x30, "Visual_FoxPro"),
-        VISUAL_FOXPRO_AUTOINCREMENT(0x31, "Visual_FoxPro_autoincrement"),
-        VISUAL_FOXPRO_VAR(0x32, "Visual_FoxPro_with_Varchar_or_Varbinary"),
-        DBASE_IV_SQL_TABLE(0x43, "dBASE_IV_SQL_table"),
-        DBASE_IV_SQL_SYSTEM(0x63, "dBASE_IV_SQL_system"),
-        FOX_BASE_PLUS_WITH_MEMO(0x83, "FoxBASE_plus_with_memo"),
-        DBASE_IV_WITH_MEMO(0x8B, "dBASE_IV_with_memo"),
-        DBASE_IV_SQL_TABLE_WITH_MEMO(0xCB, "dBASE_IV_SQL_table_with_memo"),
-        FOXPRO_2x_WITH_MEMO(0xF5, "FoxPro_2.x_with_memo"),
-        HIPER_SIZ_WITH_SMT_MEMO(0xE5, "HiPer-Siz_with_SMT_memo"),
-        FOXBASE2(0xFB, "FoxBASE");
+        FOXBASE(0x02, "FoxBASE", ""),
+        FOXBASE_PLUS(0x03, "FoxBASE_plus", ""),
+        VISUAL_FOXPRO(0x30, "Visual_FoxPro", ""),
+        VISUAL_FOXPRO_AUTOINCREMENT(0x31, "Visual_FoxPro", "autoincrement"),
+        VISUAL_FOXPRO_VAR(0x32, "Visual_FoxPro", "Varchar_or_Varbinary"),
+        DBASE_IV_SQL_TABLE(0x43, "dBASE_IV_SQL", "table"),
+        DBASE_IV_SQL_SYSTEM(0x63, "dBASE_IV_SQL", "system"),
+        FOX_BASE_PLUS_WITH_MEMO(0x83, "FoxBASE_plus", "memo"),
+        DBASE_IV_WITH_MEMO(0x8B, "dBASE_IV", "memo"),
+        DBASE_IV_SQL_TABLE_WITH_MEMO(0xCB, "dBASE_IV_SQL", "table_with_memo"),
+        FOXPRO_2x_WITH_MEMO(0xF5, "FoxPro_2.x", "memo"),
+        HIPER_SIZ_WITH_SMT_MEMO(0xE5, "HiPer-Siz", "SMT_memo"),
+        FOXBASE2(0xFB, "FoxBASE", "");
 
         private final int id;
-        private final String name;
+        private final String format;
+        private final String type;
 
-        Version(int id, String name) {
+        Version(int id, String format, String type) {
             this.id = id;
-            this.name = name;
+            this.format = format;
+            this.type = type;
         }
 
         int getId() {
             return id;
         }
 
-        String getName() {
-            return name;
+        String getFormat() {
+            return format;
+        }
+
+        String getType() {
+            return type;
         }
-    };
+
+        String getFullMimeString() {
+            StringBuilder sb = new StringBuilder();
+            sb.append("application/x-dbf; 
").append("format=").append(getFormat());
+            if (!"".equals(type)) {
+                sb.append("; type=").append(getType());
+            }
+            return sb.toString();
+        }
+    }
+
+    ;
 
     private static final Map<Integer, Version> VERSION_MAP = new 
ConcurrentHashMap<>();
+
     static {
         for (Version version : Version.values()) {
             VERSION_MAP.put(version.id, version);

http://git-wip-us.apache.org/repos/asf/tika/blob/15ec358c/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
----------------------------------------------------------------------
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
 
b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
index a531c55..202c8c8 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/test/java/org/apache/tika/parser/dbf/DBFParserTest.java
@@ -39,7 +39,7 @@ public class DBFParserTest extends TikaTest {
     @Test
     public void testBasic() throws Exception {
         XMLResult r = getXML("testDBF.dbf");
-        assertEquals("application/x-dbf; dbf_version=FoxBASE_plus", 
r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals(DBFReader.Version.FOXBASE_PLUS.getFullMimeString(), 
r.metadata.get(Metadata.CONTENT_TYPE));
         assertEquals("2016-05-24T00:00:00Z", 
r.metadata.get(TikaCoreProperties.MODIFIED));
         assertEquals("UTF-8", r.metadata.get(Metadata.CONTENT_ENCODING));
 
@@ -64,7 +64,7 @@ public class DBFParserTest extends TikaTest {
     @Test
     public void testGB18030Encoded() throws Exception {
         XMLResult r = getXML("testDBF_gb18030.dbf");
-        assertEquals("application/x-dbf; dbf_version=FoxBASE_plus", 
r.metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals(DBFReader.Version.FOXBASE_PLUS.getFullMimeString(), 
r.metadata.get(Metadata.CONTENT_TYPE));
         assertContains("虽然该", r.xml);
     }
 
@@ -131,8 +131,7 @@ public class DBFParserTest extends TikaTest {
             //this cast happens to work because of the range of possible values
             bytes[0] = (byte)version.getId();
             XMLResult r = getXML(TikaInputStream.get(bytes), new 
AutoDetectParser(), new Metadata());
-            assertEquals("application/x-dbf; "+
-                    
DBFParser.DBF_VERSION_MIME_ATTRIBUTE+"="+version.getName(), 
r.metadata.get(Metadata.CONTENT_TYPE));
+            assertEquals(version.getFullMimeString(), 
r.metadata.get(Metadata.CONTENT_TYPE));
         }
     }
 

Reply via email to