Author: mattmann
Date: Tue Oct 2 08:35:23 2007
New Revision: 581280
URL: http://svn.apache.org/viewvc?rev=581280&view=rev
Log:
- fix for TIKA-34 (contributed by K. Bennett)
Modified:
incubator/tika/trunk/CHANGES.txt
incubator/tika/trunk/pom.xml
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
Modified: incubator/tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=581280&r1=581279&r2=581280&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Tue Oct 2 08:35:23 2007
@@ -60,3 +60,6 @@
27. TIKA-35 - Extract MsOffice properties (ridabenjelloun)
28. TIKA-39 - Excel parsing improvements (siren & ridabenjelloun)
+
+29. TIKA-34 - Provide a method that will return a default configuration
+ (TikaConfig) (K. Bennett & mattmann)
Modified: incubator/tika/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/pom.xml?rev=581280&r1=581279&r2=581280&view=diff
==============================================================================
--- incubator/tika/trunk/pom.xml (original)
+++ incubator/tika/trunk/pom.xml Tue Oct 2 08:35:23 2007
@@ -234,6 +234,8 @@
<include name="*"/>
</fileset>
</copy>
+ <copy todir="${project.build.outputDirectory}/org/apache/tika"
+ file="${basedir}/src/main/resources/tika-config.xml"/>
</tasks>
</configuration>
<goals>
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java?rev=581280&r1=581279&r2=581280&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
Tue Oct 2 08:35:23 2007
@@ -27,6 +27,7 @@
//TIKA imports
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MimeUtils;
+import org.apache.tika.utils.Utils;
//JDOM imports
import org.jdom.Document;
@@ -39,6 +40,9 @@
* Parse xml config file.
*/
public class TikaConfig {
+
+ public static final String DEFAULT_CONFIG_LOCATION =
+ "/org/apache/tika/tika-config.xml";
private final Map<String, ParserConfig> configs =
new HashMap<String, ParserConfig>();
@@ -85,6 +89,22 @@
public MimeTypes getMimeRepository(){
return mimeTypeRepo.getRepository();
+ }
+
+ /**
+ * Provides a default configuration (TikaConfig). Currently creates a
+ * new instance each time it's called; we may be able to have it
+ * return a shared instance once it is completely immutable.
+ *
+ * @return
+ * @throws IOException
+ * @throws JDOMException
+ */
+ public static TikaConfig getDefaultConfig()
+ throws IOException, JDOMException {
+
+ return new TikaConfig(
+ Utils.class.getResourceAsStream(DEFAULT_CONFIG_LOCATION));
}
}
Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=581280&r1=581279&r2=581280&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
(original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Tue Oct
2 08:35:23 2007
@@ -61,21 +61,30 @@
*/
// FIXME for now, fix filenames according to Maven testing
layout
- final String tikaConfigFilename =
"target/classes/tika-config.xml";
- final String log4jPropertiesFilename =
"target/classes/log4j/log4j.properties";
- testFilesBaseDir = new
File("src/test/resources/test-documents");
+
+ // The file below should be the default configuration for the test of
+ // getDefaultConfig() to be legitimate.
+ final String tikaConfigFilename
+ = "target/classes/org/apache/tika/tika-config.xml";
+
+ final String log4jPropertiesFilename
+ = "target/classes/log4j/log4j.properties";
+
+ testFilesBaseDir = new File("src/test/resources/test-documents");
tc = new TikaConfig(tikaConfigFilename);
TikaLogger.setLoggerConfigFile(log4jPropertiesFilename);
-
}
public void testPDFExtraction() throws Exception {
File file = getTestFile("testPDF.pdf");
String s1 = ParseUtils.getStringContent(file, tc);
String s2 = ParseUtils.getStringContent(file, tc,
"application/pdf");
- assertEquals(s1, s2);
+ String s3 = ParseUtils.getStringContent(file,
+ TikaConfig.getDefaultConfig());
+ assertEquals(s1, s2);
+ assertEquals(s1, s3);
}
public void testTXTExtraction() throws Exception {
@@ -139,7 +148,11 @@
}
public void testEXCELExtraction() throws Exception {
- final String expected = "Numbers and their Squares Number
Square 1.0 1.0 2.0 4.0 3.0 9.0 4.0 16.0 5.0 25.0 6.0 36.0 7.0 49.0 8.0 64.0 9.0
81.0 10.0 100.0 11.0 121.0 12.0 144.0 13.0 169.0 14.0 196.0 15.0 225.0 Written
and saved in Microsoft Excel X for Mac Service Release 1.";
+ final String expected = "Numbers and their Squares Number
Square 1.0 "
+ + "1.0 2.0 4.0 3.0 9.0 4.0 16.0 5.0 25.0 6.0 36.0 7.0 49.0 8.0
"
+ + "64.0 9.0 81.0 10.0 100.0 11.0 121.0 12.0 144.0 13.0 169.0 "
+ + "14.0 196.0 15.0 225.0 Written and saved in Microsoft Excel "
+ + "X for Mac Service Release 1.";
File file = getTestFile("testEXCEL.xls");
String s1 = ParseUtils.getStringContent(file, tc);
String s2 = ParseUtils.getStringContent(file, tc,