Author: mattmann
Date: Fri Oct 3 23:53:48 2014
New Revision: 1629339
URL: http://svn.apache.org/r1629339
Log:
Fix for TIKA-1354 Register ForkParser Service in Activator. Contributed by
Michal Hlavac <[email protected]>. This closes #13.
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1629339&r1=1629338&r2=1629339&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Fri Oct 3 23:53:48 2014
@@ -1,4 +1,10 @@
Release 1.7 - Current Development
+
+ * The ForkParser service is now registered in Activator
+ (TIKA-1354).
+
+ * The Rome Library was upgraded to version 1.5 (TIKA-1435).
+
* Add markup for files embedded in PDFs (TIKA-1427).
* Extract files embedded in annotations in PDFS (TIKA-1433).
Modified:
tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java?rev=1629339&r1=1629338&r2=1629339&view=diff
==============================================================================
--- tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
(original)
+++ tika/trunk/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
Fri Oct 3 23:53:48 2014
@@ -23,10 +23,13 @@ import static org.junit.Assert.fail;
import static org.ops4j.pax.exam.CoreOptions.bundle;
import static org.ops4j.pax.exam.CoreOptions.junitBundles;
+import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.StringWriter;
+import java.io.Writer;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.List;
@@ -37,7 +40,9 @@ import org.apache.tika.config.ServiceLoa
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
+import org.apache.tika.fork.ForkParser;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
@@ -55,7 +60,7 @@ import org.xml.sax.ContentHandler;
@RunWith( JUnit4TestRunner.class )
public class BundleIT {
private final File TARGET = new File("target");
-
+
@Configuration
public Option[] configuration() throws IOException, URISyntaxException {
File base = new File(TARGET, "test-bundles");
@@ -64,7 +69,7 @@ public class BundleIT {
bundle(new File(base,
"tika-core.jar").toURI().toURL().toString()),
bundle(new File(base,
"tika-bundle.jar").toURI().toURL().toString()));
}
-
+
@Test
public void testBundleLoaded(BundleContext bc) throws Exception {
boolean hasCore = false, hasBundle = false;
@@ -81,7 +86,7 @@ public class BundleIT {
assertTrue("Core bundle not found", hasCore);
assertTrue("Bundle bundle not found", hasBundle);
}
-
+
@Test
public void testBundleDetection(BundleContext bc) throws Exception {
Tika tika = new Tika();
@@ -91,16 +96,37 @@ public class BundleIT {
assertEquals("application/pdf", tika.detect("test.pdf"));
}
+ @Test
+ public void testForkParser(BundleContext bc) throws Exception {
+ ForkParser parser = (ForkParser)
bc.getService(bc.getServiceReference(ForkParser.class.getName()));
+ ClassLoader classLoader = parser.getClass().getClassLoader();
+ String data = "<!DOCTYPE html>\n<html><body><p>test
<span>content</span></p></body></html>";
+ InputStream stream = new ByteArrayInputStream(data.getBytes("UTF-8"));
+ Writer writer = new StringWriter();
+ ContentHandler contentHandler = new BodyContentHandler(writer);
+ Metadata metadata = new Metadata();
+ Detector contentTypeDetector = new DefaultDetector(classLoader);
+ MediaType type = contentTypeDetector.detect(stream, metadata);
+ assertEquals(type.toString(), "text/html");
+ metadata.add(Metadata.CONTENT_TYPE, type.toString());
+ ParseContext parseCtx = new ParseContext();
+ parser.parse(stream, contentHandler, metadata, parseCtx);
+ writer.flush();
+ String content = writer.toString();
+ assertTrue(content.length() > 0);
+ assertEquals("test content", content.trim());
+ }
+
@Ignore // TODO Fix this test
@Test
public void testBundleSimpleText(BundleContext bc) throws Exception {
Tika tika = new Tika();
-
+
// Simple text extraction
String xml = tika.parseToString(new File("pom.xml"));
assertTrue(xml.contains("tika-bundle"));
}
-
+
@Ignore // TODO Fix this test
@Test
public void testBundleDetectors(BundleContext bc) throws Exception {
@@ -108,19 +134,19 @@ public class BundleIT {
// TODO Why is this not finding the detector service resource files?
TestingServiceLoader loader = new TestingServiceLoader();
List<String> rawDetectors =
loader.identifyStaticServiceProviders(Detector.class);
-
+
// Check we did get a few, just in case...
assertNotNull(rawDetectors);
assertTrue("Should have several Detector names, found " +
rawDetectors.size(),
rawDetectors.size() > 3);
-
+
// Get the classes found within OSGi
DefaultDetector detector = new DefaultDetector();
Set<String> osgiDetectors = new HashSet<String>();
for (Detector d : detector.getDetectors()) {
osgiDetectors.add(d.getClass().getName());
}
-
+
// Check that OSGi didn't miss any
for (String detectorName : rawDetectors) {
if (!osgiDetectors.contains(detectorName)) {
@@ -129,14 +155,14 @@ public class BundleIT {
}
}
}
-
+
@Test
public void testBundleParsers(BundleContext bc) throws Exception {
TikaConfig tika = new TikaConfig();
// TODO Implement as with Detectors
}
-
+
@Ignore // TODO Fix this test
@Test
public void testTikaBundle(BundleContext bc) throws Exception {
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java?rev=1629339&r1=1629338&r2=1629339&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/Activator.java
Fri Oct 3 23:53:48 2014
@@ -20,6 +20,7 @@ import java.util.Properties;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
+import org.apache.tika.fork.ForkParser;
import org.apache.tika.parser.DefaultParser;
import org.apache.tika.parser.Parser;
import org.osgi.framework.BundleActivator;
@@ -32,20 +33,28 @@ public class Activator implements Bundle
private ServiceRegistration parserService;
+ private ServiceRegistration forkParserService;
+
public void start(BundleContext context) throws Exception {
detectorService = context.registerService(
Detector.class.getName(),
new DefaultDetector(Activator.class.getClassLoader()),
new Properties());
+ Parser parser = new DefaultParser(Activator.class.getClassLoader());
parserService = context.registerService(
Parser.class.getName(),
- new DefaultParser(Activator.class.getClassLoader()),
+ parser,
+ new Properties());
+ forkParserService = context.registerService(
+ ForkParser.class.getName(),
+ new ForkParser(Activator.class.getClassLoader(), parser),
new Properties());
}
public void stop(BundleContext context) throws Exception {
parserService.unregister();
detectorService.unregister();
+ forkParserService.unregister();
}
}