[
https://issues.apache.org/jira/browse/TIKA-607?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Joseph Vychtrle updated TIKA-607:
---------------------------------
Description:
Hey, I'm trying to get content of a text file (mysql config file).
{code}
public void testTikaParserUtils() throws Exception {
String resourceLocation = "files/my.cnf";
String content = ParseUtils.getStringContent(new
File(resourceLocation), new TikaConfig());
System.out.println(content);
}
{code}
but I get null pointer exception, because "parser" is null
{code:title=ParseUtils.java|borderStyle=solid}
public static String getStringContent(
InputStream stream, TikaConfig config, String mimeType)
throws TikaException, IOException {
try {
Parser parser = config.getParser(MediaType.parse(mimeType));
ContentHandler handler = new BodyContentHandler();
parser.parse(stream, handler, new Metadata());
return handler.toString();
} catch (SAXException e) {
throw new TikaException("Unexpected SAX error", e);
}
}}
{code}
java.lang.NullPointerException
at
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:112)
at
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:171)
at
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:189)
at
cz.instance.transl.tests.TikaTest.testTikaParserUtils(TikaTest.java:53)
at
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
at
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
at
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
at
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
at $Proxy0.invoke(Unknown Source)
at
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
at
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
at
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
was:
Hey, I was trying Tika with 4 different documents and reading the inputStream
always ends up as you can see in the logs. Also reading content of a text file
my.cnf failed.
{code:title=TikaTest.java|borderStyle=solid}
package cz.instance.transl.tests;
import java.io.File;
import java.io.InputStream;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.detect.TypeDetector;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.utils.ParseUtils;
import org.testng.annotations.Test;
import org.xml.sax.ContentHandler;
public class TikaTest {
@Test
public void testPDFParser() throws Exception {
String resourceLocation =
"file/Designandrealizationofanintranetportal.pdf";
InputStream input =
this.getClass().getClassLoader().getResourceAsStream(resourceLocation);
ContentHandler textHandler = new BodyContentHandler();
Metadata metadata = new Metadata();
PDFParser parser = new PDFParser();
parser.parse(input, textHandler, metadata, new ParseContext());
input.close();
System.out.println("Title: " + metadata.get("title"));
System.out.println("Author: " + metadata.get("Author"));
System.out.println("format: " + metadata.get("source"));
System.out.println("content: " + textHandler.toString());
}
@Test
public void testAutoDetectParser() throws Exception {
InputStream input =
this.getClass().getResourceAsStream("file/jedna.odt");
ContentHandler textHandler = new BodyContentHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, textHandler, metadata, new ParseContext());
input.close();
System.out.println("Title: " + metadata.get("title"));
System.out.println("Author: " + metadata.get("Author"));
}
@Test
public void testTikaParserUtils() throws Exception {
String resourceLocation = "my.cnf";
String content = ParseUtils.getStringContent(new
File(resourceLocation), new TikaConfig());
System.out.println(content);
}
@Test
public void testTypeDetector() throws Exception {
String resourceLocation =
"file/Pozadavky_pro_predkladani_diplomovych_praci.doc";
InputStream input =
this.getClass().getClassLoader().getResourceAsStream(resourceLocation);
Detector detector = new TypeDetector();
MediaType media = detector.detect(input, new Metadata());
System.out.println("Extact Type: " + media.getType());
System.out.println("Sub Type: " + media.getBaseType());
}
@Test
public void testLanguageIdentifier() throws Exception {
String resourceLocation = "file/moje.pdf";
InputStream input =
this.getClass().getClassLoader().getResourceAsStream(resourceLocation);
ContentHandler textHandler = new BodyContentHandler();
Metadata metadata = new Metadata();
Parser parser = new AutoDetectParser();
parser.parse(input, textHandler, metadata, new ParseContext());
input.close();
LanguageIdentifier languageIdentifier = new
LanguageIdentifier(textHandler.toString());
System.out.println("found language :" +
languageIdentifier.getLanguage() + " certainity : "
+ languageIdentifier.isReasonablyCertain());
}
}
}
{code}
-------------------------------------------------------
T E S T S
-------------------------------------------------------
Running TestSuite
[TestRunner] Running the tests in 'DomainObjectFactoryTests' with parallel
mode:false
[RunInfo] Adding method selector:
org.testng.internal.XmlMethodSelector@46e45076 priority: 10
[TestClass] Creating TestClass for [ClassImpl cz.instance.transl.tests.TikaTest]
[TestClass] Adding method cz.instance.transl.tests.TikaTest.testTypeDetector()
on TestClass class cz.instance.transl.tests.TikaTest
[TestClass] Adding method
cz.instance.transl.tests.TikaTest.testLanguageIdentifier() on TestClass class
cz.instance.transl.tests.TikaTest
[TestClass] Adding method
cz.instance.transl.tests.TikaTest.testAutoDetectParser() on TestClass class
cz.instance.transl.tests.TikaTest
[TestClass] Adding method
cz.instance.transl.tests.TikaTest.testTikaParserUtils() on TestClass class
cz.instance.transl.tests.TikaTest
[TestClass] Adding method cz.instance.transl.tests.TikaTest.testPDFParser() on
TestClass class cz.instance.transl.tests.TikaTest
[XmlMethodSelector] Including method cz.instance.transl.tests.testTypeDetector()
[XmlMethodSelector] Including method
cz.instance.transl.tests.testLanguageIdentifier()
[XmlMethodSelector] Including method
cz.instance.transl.tests.testAutoDetectParser()
[XmlMethodSelector] Including method
cz.instance.transl.tests.testTikaParserUtils()
[XmlMethodSelector] Including method cz.instance.transl.tests.testPDFParser()
[SuiteRunner] Created 1 TestRunners
[TestRunner] Running test DomainObjectFactoryTests on 1 classes, included
groups:[] excluded groups:[]
[TestClass]
======
TESTCLASS: cz.instance.transl.tests.TikaTest
[TestClass] Test :
cz.instance.transl.tests.TikaTest.testTypeDetector()
[TestClass] Test :
cz.instance.transl.tests.TikaTest.testLanguageIdentifier()
[TestClass] Test :
cz.instance.transl.tests.TikaTest.testAutoDetectParser()
[TestClass] Test :
cz.instance.transl.tests.TikaTest.testTikaParserUtils()
[TestClass] Test :
cz.instance.transl.tests.TikaTest.testPDFParser()
[TestClass]
======
[TestRunner] Found 5 applicable methods
[TestRunner] WILL BE RUN IN RANDOM ORDER:
[TestRunner] cz.instance.transl.tests.TikaTest.testAutoDetectParser()
[TestRunner] on instances
[TestRunner] cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner] cz.instance.transl.tests.TikaTest.testPDFParser()
[TestRunner] on instances
[TestRunner] cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner] cz.instance.transl.tests.TikaTest.testTikaParserUtils()
[TestRunner] on instances
[TestRunner] cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner] cz.instance.transl.tests.TikaTest.testTypeDetector()
[TestRunner] on instances
[TestRunner] cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner] cz.instance.transl.tests.TikaTest.testLanguageIdentifier()
[TestRunner] on instances
[TestRunner] cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner] ===
[Invoker 374961130] Invoking
cz.instance.transl.tests.TikaTest.testAutoDetectParser
[Invoker 374961130] Invoking cz.instance.transl.tests.TikaTest.testPDFParser
[Invoker 374961130] Invoking
cz.instance.transl.tests.TikaTest.testTikaParserUtils
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage
[Invoker 374961130] Invoking cz.instance.transl.tests.TikaTest.testTypeDetector
Extact Type: application
Sub Type: application/octet-stream
[Invoker 374961130] Invoking
cz.instance.transl.tests.TikaTest.testLanguageIdentifier
*********** INVOKED METHODS
cz.instance.transl.tests.TikaTest.testAutoDetectParser()
490489482
cz.instance.transl.tests.TikaTest.testPDFParser() 490489482
cz.instance.transl.tests.TikaTest.testTikaParserUtils()
490489482
cz.instance.transl.tests.TikaTest.testTypeDetector() 490489482
cz.instance.transl.tests.TikaTest.testLanguageIdentifier()
490489482
***********
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/DomainObjectFactoryTests.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/DomainObjectFactoryTests.xml
PASSED: testTypeDetector
FAILED: testAutoDetectParser
java.io.IOException: Stream closed
at java.io.BufferedInputStream.getInIfOpen(BufferedInputStream.java:134)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
at java.io.FilterInputStream.read(FilterInputStream.java:90)
at org.apache.tika.mime.MimeTypes.readMagicHeader(MimeTypes.java:303)
at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:548)
at
org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:60)
at
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:126)
at
cz.instance.transl.tests.TikaTest.testAutoDetectParser(TikaTest.java:44)
at
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
at
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
at
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
at
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
at $Proxy0.invoke(Unknown Source)
at
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
at
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
at
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
FAILED: testPDFParser
java.io.IOException: Stream closed
at java.io.BufferedInputStream.getInIfOpen(BufferedInputStream.java:134)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:256)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read(BufferedInputStream.java:237)
at java.io.FilterInputStream.read(FilterInputStream.java:66)
at java.io.PushbackInputStream.read(PushbackInputStream.java:122)
at
org.apache.pdfbox.io.PushBackInputStream.read(PushBackInputStream.java:84)
at
org.apache.pdfbox.io.PushBackInputStream.peek(PushBackInputStream.java:62)
at
org.apache.pdfbox.io.PushBackInputStream.isEOF(PushBackInputStream.java:150)
at org.apache.pdfbox.pdfparser.BaseParser.readLine(BaseParser.java:1248)
at org.apache.pdfbox.pdfparser.PDFParser.parseHeader(PDFParser.java:283)
at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:155)
at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:881)
at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:846)
at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:74)
at cz.instance.transl.tests.TikaTest.testPDFParser(TikaTest.java:30)
at
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
at
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
at
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
at
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
at $Proxy0.invoke(Unknown Source)
at
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
at
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
at
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
FAILED: testTikaParserUtils
java.lang.NullPointerException
at
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:112)
at
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:171)
at
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:189)
at
cz.instance.transl.tests.TikaTest.testTikaParserUtils(TikaTest.java:54)
at
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
at
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
at
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
at
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
at $Proxy0.invoke(Unknown Source)
at
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
at
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
at
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
FAILED: testLanguageIdentifier
java.io.IOException: Stream closed
at java.io.BufferedInputStream.getInIfOpen(BufferedInputStream.java:134)
at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
at java.io.FilterInputStream.read(FilterInputStream.java:90)
at org.apache.tika.mime.MimeTypes.readMagicHeader(MimeTypes.java:303)
at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:548)
at
org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:60)
at
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:126)
at
cz.instance.transl.tests.TikaTest.testLanguageIdentifier(TikaTest.java:75)
at
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
at
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
at
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
at
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
at $Proxy0.invoke(Unknown Source)
at
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
at
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
at
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
===============================================
DomainObjectFactoryTests
Tests run: 5, Failures: 4, Skips: 0
===============================================
===============================================
domain
Total tests run: 5, Failures: 4, Skips: 0
===============================================
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/toc.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/DomainObjectFactoryTests.properties
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/index.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/main.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/groups.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/methods.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/methods-alphabetical.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/classes.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/reporter-output.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/methods-not-run.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/testng.xml.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/index.html
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/testng-failed.xml
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/testng-failed.xml
Creating
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/testng-results.xml
Tests run: 5, Failures: 4, Errors: 0, Skipped: 0, Time elapsed: 1.724 sec <<<
FAILURE!
Results :
Failed tests:
testAutoDetectParser(cz.instance.transl.tests.TikaTest)
testPDFParser(cz.instance.transl.tests.TikaTest)
testTikaParserUtils(cz.instance.transl.tests.TikaTest)
testLanguageIdentifier(cz.instance.transl.tests.TikaTest)
Tests run: 5, Failures: 4, Errors: 0, Skipped: 0
Summary: ParseUtils.getStringContent( ) of a text file - parser is null
(was: BufferedInputStream.getInIfOpen() - null inputStream )
> ParseUtils.getStringContent( ) of a text file - parser is null
> ---------------------------------------------------------------
>
> Key: TIKA-607
> URL: https://issues.apache.org/jira/browse/TIKA-607
> Project: Tika
> Issue Type: Bug
> Components: parser
> Affects Versions: 0.9
> Environment: java version "1.6.0_16", linux 64bit
> Reporter: Joseph Vychtrle
>
> Hey, I'm trying to get content of a text file (mysql config file).
> {code}
> public void testTikaParserUtils() throws Exception {
> String resourceLocation = "files/my.cnf";
> String content = ParseUtils.getStringContent(new
> File(resourceLocation), new TikaConfig());
> System.out.println(content);
> }
> {code}
> but I get null pointer exception, because "parser" is null
> {code:title=ParseUtils.java|borderStyle=solid}
> public static String getStringContent(
> InputStream stream, TikaConfig config, String mimeType)
> throws TikaException, IOException {
> try {
> Parser parser = config.getParser(MediaType.parse(mimeType));
> ContentHandler handler = new BodyContentHandler();
> parser.parse(stream, handler, new Metadata());
> return handler.toString();
> } catch (SAXException e) {
> throw new TikaException("Unexpected SAX error", e);
> }
> }}
> {code}
> java.lang.NullPointerException
> at
> org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:112)
> at
> org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:171)
> at
> org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:189)
> at
> cz.instance.transl.tests.TikaTest.testTikaParserUtils(TikaTest.java:53)
> at
> org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
> at
> org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
> at
> org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
> at
> org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
> at $Proxy0.invoke(Unknown Source)
> at
> org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
> at
> org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
> at
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
> ... Removed 24 stack frames
--
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira