[ 
https://issues.apache.org/jira/browse/TIKA-607?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Joseph Vychtrle updated TIKA-607:
---------------------------------

    Description: 
Hey, I'm trying to get content of a text file (mysql config file).
{code}
        public void testTikaParserUtils() throws Exception {
                String resourceLocation = "files/my.cnf";
                String content = ParseUtils.getStringContent(new 
File(resourceLocation), new TikaConfig());
                System.out.println(content);
        }
{code} 
but I get null pointer exception, because "parser" is null

{code:title=ParseUtils.java|borderStyle=solid}
public static String getStringContent(
            InputStream stream, TikaConfig config, String mimeType)
            throws TikaException, IOException {
        try {
            Parser parser = config.getParser(MediaType.parse(mimeType));
            ContentHandler handler = new BodyContentHandler();
            parser.parse(stream, handler, new Metadata());
            return handler.toString();
        } catch (SAXException e) {
            throw new TikaException("Unexpected SAX error", e);
        }
    }}
{code} 

java.lang.NullPointerException
        at 
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:112)
        at 
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:171)
        at 
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:189)
        at 
cz.instance.transl.tests.TikaTest.testTikaParserUtils(TikaTest.java:53)
        at 
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
        at 
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
        at 
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
        at 
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
        at $Proxy0.invoke(Unknown Source)
        at 
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
        at 
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
        at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames

  was:
Hey, I was trying Tika with 4 different documents and reading the inputStream 
always ends up as you can see in the logs. Also reading content of a text file 
my.cnf failed.

{code:title=TikaTest.java|borderStyle=solid}
package cz.instance.transl.tests;

import java.io.File;
import java.io.InputStream;

import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.detect.TypeDetector;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.pdf.PDFParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.utils.ParseUtils;
import org.testng.annotations.Test;
import org.xml.sax.ContentHandler;

public class TikaTest {

        @Test
        public void testPDFParser() throws Exception {
                String resourceLocation = 
"file/Designandrealizationofanintranetportal.pdf";
                InputStream input = 
this.getClass().getClassLoader().getResourceAsStream(resourceLocation);
                ContentHandler textHandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                PDFParser parser = new PDFParser();
                parser.parse(input, textHandler, metadata, new ParseContext());
                input.close();
                System.out.println("Title: " + metadata.get("title"));
                System.out.println("Author: " + metadata.get("Author"));
                System.out.println("format: " + metadata.get("source"));
                System.out.println("content: " + textHandler.toString());
        }

        @Test
        public void testAutoDetectParser() throws Exception {
                InputStream input = 
this.getClass().getResourceAsStream("file/jedna.odt");
                ContentHandler textHandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                Parser parser = new AutoDetectParser();
                parser.parse(input, textHandler, metadata, new ParseContext());
                input.close();
                System.out.println("Title: " + metadata.get("title"));
                System.out.println("Author: " + metadata.get("Author"));
        }

        @Test
        public void testTikaParserUtils() throws Exception {
                String resourceLocation = "my.cnf";
                String content = ParseUtils.getStringContent(new 
File(resourceLocation), new TikaConfig());
                System.out.println(content);
        }

        @Test
        public void testTypeDetector() throws Exception {
                String resourceLocation = 
"file/Pozadavky_pro_predkladani_diplomovych_praci.doc";
                InputStream input = 
this.getClass().getClassLoader().getResourceAsStream(resourceLocation);
                Detector detector = new TypeDetector();
                MediaType media = detector.detect(input, new Metadata());
                System.out.println("Extact Type: " + media.getType());
                System.out.println("Sub Type: " + media.getBaseType());
        }

        @Test
        public void testLanguageIdentifier() throws Exception {
                String resourceLocation = "file/moje.pdf";
                InputStream input = 
this.getClass().getClassLoader().getResourceAsStream(resourceLocation);
                ContentHandler textHandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                Parser parser = new AutoDetectParser();
                parser.parse(input, textHandler, metadata, new ParseContext());
                input.close();
                LanguageIdentifier languageIdentifier = new 
LanguageIdentifier(textHandler.toString());
                System.out.println("found language :" + 
languageIdentifier.getLanguage() + " certainity : "
                                + languageIdentifier.isReasonablyCertain());
        }

}
}
{code} 



-------------------------------------------------------
 T E S T S
-------------------------------------------------------
Running TestSuite
[TestRunner] Running the tests in 'DomainObjectFactoryTests' with parallel 
mode:false
[RunInfo] Adding method selector: 
org.testng.internal.XmlMethodSelector@46e45076 priority: 10
[TestClass] Creating TestClass for [ClassImpl cz.instance.transl.tests.TikaTest]
[TestClass] Adding method cz.instance.transl.tests.TikaTest.testTypeDetector() 
on TestClass class cz.instance.transl.tests.TikaTest
[TestClass] Adding method 
cz.instance.transl.tests.TikaTest.testLanguageIdentifier() on TestClass class 
cz.instance.transl.tests.TikaTest
[TestClass] Adding method 
cz.instance.transl.tests.TikaTest.testAutoDetectParser() on TestClass class 
cz.instance.transl.tests.TikaTest
[TestClass] Adding method 
cz.instance.transl.tests.TikaTest.testTikaParserUtils() on TestClass class 
cz.instance.transl.tests.TikaTest
[TestClass] Adding method cz.instance.transl.tests.TikaTest.testPDFParser() on 
TestClass class cz.instance.transl.tests.TikaTest
[XmlMethodSelector] Including method cz.instance.transl.tests.testTypeDetector()
[XmlMethodSelector] Including method 
cz.instance.transl.tests.testLanguageIdentifier()
[XmlMethodSelector] Including method 
cz.instance.transl.tests.testAutoDetectParser()
[XmlMethodSelector] Including method 
cz.instance.transl.tests.testTikaParserUtils()
[XmlMethodSelector] Including method cz.instance.transl.tests.testPDFParser()
[SuiteRunner] Created 1 TestRunners
[TestRunner] Running test DomainObjectFactoryTests on 1  classes,  included 
groups:[] excluded groups:[]
[TestClass] 
======
TESTCLASS: cz.instance.transl.tests.TikaTest
[TestClass] Test        :               
cz.instance.transl.tests.TikaTest.testTypeDetector()
[TestClass] Test        :               
cz.instance.transl.tests.TikaTest.testLanguageIdentifier()
[TestClass] Test        :               
cz.instance.transl.tests.TikaTest.testAutoDetectParser()
[TestClass] Test        :               
cz.instance.transl.tests.TikaTest.testTikaParserUtils()
[TestClass] Test        :               
cz.instance.transl.tests.TikaTest.testPDFParser()
[TestClass] 
======

[TestRunner] Found 5 applicable methods
[TestRunner] WILL BE RUN IN RANDOM ORDER:
[TestRunner]   cz.instance.transl.tests.TikaTest.testAutoDetectParser()
[TestRunner]       on instances
[TestRunner]      cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner]   cz.instance.transl.tests.TikaTest.testPDFParser()
[TestRunner]       on instances
[TestRunner]      cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner]   cz.instance.transl.tests.TikaTest.testTikaParserUtils()
[TestRunner]       on instances
[TestRunner]      cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner]   cz.instance.transl.tests.TikaTest.testTypeDetector()
[TestRunner]       on instances
[TestRunner]      cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner]   cz.instance.transl.tests.TikaTest.testLanguageIdentifier()
[TestRunner]       on instances
[TestRunner]      cz.instance.transl.tests.TikaTest@1d3c468a
[TestRunner] ===
[Invoker 374961130] Invoking 
cz.instance.transl.tests.TikaTest.testAutoDetectParser
[Invoker 374961130] Invoking cz.instance.transl.tests.TikaTest.testPDFParser
[Invoker 374961130] Invoking 
cz.instance.transl.tests.TikaTest.testTikaParserUtils
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage
[Invoker 374961130] Invoking cz.instance.transl.tests.TikaTest.testTypeDetector
Extact Type: application
Sub Type: application/octet-stream
[Invoker 374961130] Invoking 
cz.instance.transl.tests.TikaTest.testLanguageIdentifier

*********** INVOKED METHODS

                cz.instance.transl.tests.TikaTest.testAutoDetectParser() 
490489482
                cz.instance.transl.tests.TikaTest.testPDFParser() 490489482
                cz.instance.transl.tests.TikaTest.testTikaParserUtils() 
490489482
                cz.instance.transl.tests.TikaTest.testTypeDetector() 490489482
                cz.instance.transl.tests.TikaTest.testLanguageIdentifier() 
490489482

***********

Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/DomainObjectFactoryTests.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/DomainObjectFactoryTests.xml
PASSED: testTypeDetector
FAILED: testAutoDetectParser
java.io.IOException: Stream closed
        at java.io.BufferedInputStream.getInIfOpen(BufferedInputStream.java:134)
        at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
        at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
        at java.io.FilterInputStream.read(FilterInputStream.java:90)
        at org.apache.tika.mime.MimeTypes.readMagicHeader(MimeTypes.java:303)
        at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:548)
        at 
org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:60)
        at 
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:126)
        at 
cz.instance.transl.tests.TikaTest.testAutoDetectParser(TikaTest.java:44)
        at 
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
        at 
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
        at 
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
        at 
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
        at $Proxy0.invoke(Unknown Source)
        at 
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
        at 
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
        at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
FAILED: testPDFParser
java.io.IOException: Stream closed
        at java.io.BufferedInputStream.getInIfOpen(BufferedInputStream.java:134)
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:256)
        at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
        at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
        at java.io.BufferedInputStream.read(BufferedInputStream.java:237)
        at java.io.FilterInputStream.read(FilterInputStream.java:66)
        at java.io.PushbackInputStream.read(PushbackInputStream.java:122)
        at 
org.apache.pdfbox.io.PushBackInputStream.read(PushBackInputStream.java:84)
        at 
org.apache.pdfbox.io.PushBackInputStream.peek(PushBackInputStream.java:62)
        at 
org.apache.pdfbox.io.PushBackInputStream.isEOF(PushBackInputStream.java:150)
        at org.apache.pdfbox.pdfparser.BaseParser.readLine(BaseParser.java:1248)
        at org.apache.pdfbox.pdfparser.PDFParser.parseHeader(PDFParser.java:283)
        at org.apache.pdfbox.pdfparser.PDFParser.parse(PDFParser.java:155)
        at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:881)
        at org.apache.pdfbox.pdmodel.PDDocument.load(PDDocument.java:846)
        at org.apache.tika.parser.pdf.PDFParser.parse(PDFParser.java:74)
        at cz.instance.transl.tests.TikaTest.testPDFParser(TikaTest.java:30)
        at 
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
        at 
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
        at 
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
        at 
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
        at $Proxy0.invoke(Unknown Source)
        at 
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
        at 
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
        at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
FAILED: testTikaParserUtils
java.lang.NullPointerException
        at 
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:112)
        at 
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:171)
        at 
org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:189)
        at 
cz.instance.transl.tests.TikaTest.testTikaParserUtils(TikaTest.java:54)
        at 
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
        at 
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
        at 
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
        at 
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
        at $Proxy0.invoke(Unknown Source)
        at 
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
        at 
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
        at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames
FAILED: testLanguageIdentifier
java.io.IOException: Stream closed
        at java.io.BufferedInputStream.getInIfOpen(BufferedInputStream.java:134)
        at java.io.BufferedInputStream.fill(BufferedInputStream.java:218)
        at java.io.BufferedInputStream.read1(BufferedInputStream.java:258)
        at java.io.BufferedInputStream.read(BufferedInputStream.java:317)
        at java.io.FilterInputStream.read(FilterInputStream.java:90)
        at org.apache.tika.mime.MimeTypes.readMagicHeader(MimeTypes.java:303)
        at org.apache.tika.mime.MimeTypes.detect(MimeTypes.java:548)
        at 
org.apache.tika.detect.CompositeDetector.detect(CompositeDetector.java:60)
        at 
org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:126)
        at 
cz.instance.transl.tests.TikaTest.testLanguageIdentifier(TikaTest.java:75)
        at 
org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
        at 
org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
        at 
org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
        at 
org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
        at $Proxy0.invoke(Unknown Source)
        at 
org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
        at 
org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
        at 
org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
... Removed 24 stack frames

===============================================
    DomainObjectFactoryTests
    Tests run: 5, Failures: 4, Skips: 0
===============================================


===============================================
domain
Total tests run: 5, Failures: 4, Skips: 0
===============================================

Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/toc.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/DomainObjectFactoryTests.properties
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/index.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/main.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/groups.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/methods.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/methods-alphabetical.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/classes.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/reporter-output.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/methods-not-run.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/testng.xml.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/index.html
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/testng-failed.xml
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/domain/testng-failed.xml
Creating 
/opt/liferay/liferay-new/portal/plugins-trunk/portlets/brokerage/target/surefire-reports/testng-results.xml
Tests run: 5, Failures: 4, Errors: 0, Skipped: 0, Time elapsed: 1.724 sec <<< 
FAILURE!

Results :

Failed tests: 
  testAutoDetectParser(cz.instance.transl.tests.TikaTest)
  testPDFParser(cz.instance.transl.tests.TikaTest)
  testTikaParserUtils(cz.instance.transl.tests.TikaTest)
  testLanguageIdentifier(cz.instance.transl.tests.TikaTest)

Tests run: 5, Failures: 4, Errors: 0, Skipped: 0

        Summary: ParseUtils.getStringContent( ) of a text file - parser is null 
  (was: BufferedInputStream.getInIfOpen() - null inputStream )

> ParseUtils.getStringContent( ) of a text file - parser is null 
> ---------------------------------------------------------------
>
>                 Key: TIKA-607
>                 URL: https://issues.apache.org/jira/browse/TIKA-607
>             Project: Tika
>          Issue Type: Bug
>          Components: parser
>    Affects Versions: 0.9
>         Environment: java version "1.6.0_16", linux 64bit
>            Reporter: Joseph Vychtrle
>
> Hey, I'm trying to get content of a text file (mysql config file).
> {code}
>       public void testTikaParserUtils() throws Exception {
>               String resourceLocation = "files/my.cnf";
>               String content = ParseUtils.getStringContent(new 
> File(resourceLocation), new TikaConfig());
>               System.out.println(content);
>       }
> {code} 
> but I get null pointer exception, because "parser" is null
> {code:title=ParseUtils.java|borderStyle=solid}
> public static String getStringContent(
>             InputStream stream, TikaConfig config, String mimeType)
>             throws TikaException, IOException {
>         try {
>             Parser parser = config.getParser(MediaType.parse(mimeType));
>             ContentHandler handler = new BodyContentHandler();
>             parser.parse(stream, handler, new Metadata());
>             return handler.toString();
>         } catch (SAXException e) {
>             throw new TikaException("Unexpected SAX error", e);
>         }
>     }}
> {code} 
> java.lang.NullPointerException
>       at 
> org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:112)
>       at 
> org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:171)
>       at 
> org.apache.tika.utils.ParseUtils.getStringContent(ParseUtils.java:189)
>       at 
> cz.instance.transl.tests.TikaTest.testTikaParserUtils(TikaTest.java:53)
>       at 
> org.apache.maven.surefire.testng.TestNGExecutor.run(TestNGExecutor.java:73)
>       at 
> org.apache.maven.surefire.testng.TestNGXmlTestSuite.execute(TestNGXmlTestSuite.java:95)
>       at 
> org.apache.maven.surefire.testng.TestNGProvider.invoke(TestNGProvider.java:101)
>       at 
> org.apache.maven.surefire.booter.ProviderFactory$ClassLoaderProxy.invoke(ProviderFactory.java:101)
>       at $Proxy0.invoke(Unknown Source)
>       at 
> org.apache.maven.surefire.booter.SurefireStarter.invokeProvider(SurefireStarter.java:139)
>       at 
> org.apache.maven.surefire.booter.SurefireStarter.runSuitesInProcess(SurefireStarter.java:82)
>       at 
> org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:81)
> ... Removed 24 stack frames

-- 
This message is automatically generated by JIRA.
-
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Reply via email to