svn commit: r1659545 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/jdbc/ test/java/org/apache/tika/parser/jdbc/

tallison Fri, 13 Feb 2015 04:44:13 -0800

Author: tallison
Date: Fri Feb 13 12:43:56 2015
New Revision: 1659545

URL: http://svn.apache.org/r1659545
Log:
TIKA-1511, with new files added...doh


Added:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java

Added: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java?rev=1659545&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
 (added)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/AbstractDBParser.java
 Fri Feb 13 12:43:56 2015
@@ -0,0 +1,188 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.metadata.Database;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Abstract class that handles iterating through tables within a database.
+ */
+abstract class AbstractDBParser extends AbstractParser {
+
+    private final static byte[] EMPTY_BYTE_ARR = new byte[0];
+
+    private Connection connection;
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return null;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata, ParseContext context) throws IOException, SAXException, TikaException 
{
+        connection = getConnection(stream, metadata, context);
+        XHTMLContentHandler xHandler = null;
+        List<String> tableNames = null;
+        try {
+            tableNames = getTableNames(connection, metadata, context);
+        } catch (SQLException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        for (String tableName : tableNames) {
+            //add table names to parent metadata
+            metadata.add(Database.TABLE_NAME, tableName);
+        }
+        xHandler = new XHTMLContentHandler(handler, metadata);
+        xHandler.startDocument();
+
+        try {
+            for (String tableName : tableNames) {
+                JDBCTableReader tableReader = getTableReader(connection, 
tableName, context);
+                xHandler.startElement("table", "name", 
tableReader.getTableName());
+                xHandler.startElement("thead");
+                xHandler.startElement("tr");
+                for (String header : tableReader.getHeaders()) {
+                    xHandler.startElement("th");
+                    xHandler.characters(header);
+                    xHandler.endElement("th");
+                }
+                xHandler.endElement("tr");
+                xHandler.endElement("thead");
+                xHandler.startElement("tbody");
+                while (tableReader.nextRow(xHandler, context)) {
+                    //no-op
+                }
+                xHandler.endElement("tbody");
+                xHandler.endElement("table");
+            }
+        } finally {
+            if (xHandler != null) {
+               xHandler.endDocument();
+            }
+            try {
+                close();
+            } catch (SQLException e) {
+                //swallow
+            }
+        }
+    }
+
+    protected static EmbeddedDocumentExtractor 
getEmbeddedDocumentExtractor(ParseContext context) {
+       return context.get(EmbeddedDocumentExtractor.class,
+               new ParsingEmbeddedDocumentExtractor(context));
+    }
+
+    /**
+     * Override this for any special handling of closing the connection.
+     *
+     * @throws java.sql.SQLException
+     * @throws java.io.IOException
+     */
+    protected void close() throws SQLException, IOException {
+        connection.close();
+    }
+
+    /**
+     * Override this for special configuration of the connection, such as 
limiting
+     * the number of rows to be held in memory.
+     *
+     * @param stream stream to use
+     * @param metadata metadata that could be used in parameterizing the 
connection
+     * @param context parsecontext that could be used in parameterizing the 
connection
+     * @return connection
+     * @throws java.io.IOException
+     * @throws org.apache.tika.exception.TikaException
+     */
+    protected Connection getConnection(InputStream stream, Metadata metadata, 
ParseContext context) throws IOException, TikaException {
+        String connectionString = getConnectionString(stream, metadata, 
context);
+
+        Connection connection = null;
+        try {
+            Class.forName(getJDBCClassName());
+        } catch (ClassNotFoundException e) {
+            throw new TikaException(e.getMessage());
+        }
+        try{
+            connection = DriverManager.getConnection(connectionString);
+        } catch (SQLException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        return connection;
+    }
+
+    /**
+     * Implement for db specific connection information, e.g. 
"jdbc:sqlite:/docs/mydb.db"
+     * <p>
+     * Include any optimization settings, user name, password, etc.
+     * <p>
+     * @param stream stream for processing
+     * @param metadata metadata might be useful in determining connection info
+     * @param parseContext context to use to help create connectionString
+     * @return connection string to be used by {@link #getConnection}.
+     * @throws java.io.IOException
+    */
+    abstract protected String getConnectionString(InputStream stream,
+                                               Metadata metadata, ParseContext 
parseContext) throws IOException;
+
+    /**
+     * JDBC class name, e.g. org.sqlite.JDBC
+     * @return jdbc class name
+     */
+    abstract protected String getJDBCClassName();
+
+    /**
+     *
+     * Returns the names of the tables to process
+     *
+     * @param connection Connection to use to make the sql call(s) to get the 
names of the tables
+     * @param metadata Metadata to use (potentially) in decision about which 
tables to extract
+     * @param context ParseContext to use (potentially) in decision about 
which tables to extract
+     * @return
+     * @throws java.sql.SQLException
+     */
+    abstract protected List<String> getTableNames(Connection connection, 
Metadata metadata,
+                                                  ParseContext context) throws 
SQLException;
+
+    /**
+     * Given a connection and a table name, return the JDBCTableReader for 
this db.
+     *
+     * @param connection
+     * @param tableName
+     * @return
+     */
+    abstract protected JDBCTableReader getTableReader(Connection connection, 
String tableName, ParseContext parseContext);
+
+}

Added: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java?rev=1659545&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
 (added)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
 Fri Feb 13 12:43:56 2015
@@ -0,0 +1,300 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Blob;
+import java.sql.Clob;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.sql.Types;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.io.FilenameUtils;
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Database;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeType;
+import org.apache.tika.mime.MimeTypeException;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * General base class to iterate through rows of a JDBC table
+ */
+class JDBCTableReader {
+
+    private final static Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
+    private final Connection connection;
+    private final String tableName;
+    int maxClobLength = 1000000;
+    ResultSet results = null;
+    int rows = 0;
+    private TikaConfig tikaConfig = null;
+    private Detector detector = null;
+    private MimeTypes mimeTypes = null;
+
+    public JDBCTableReader(Connection connection, String tableName, 
ParseContext context) {
+        this.connection = connection;
+        this.tableName = tableName;
+        this.tikaConfig = context.get(TikaConfig.class);
+    }
+
+    public boolean nextRow(ContentHandler handler, ParseContext context) 
throws IOException, SAXException {
+        //lazy initialization
+        if (results == null) {
+            reset();
+        }
+        try {
+            if (!results.next()) {
+                return false;
+            }
+        } catch (SQLException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        try {
+            ResultSetMetaData meta = results.getMetaData();
+            handler.startElement(XHTMLContentHandler.XHTML, "tr", "tr", 
EMPTY_ATTRIBUTES);
+            for (int i = 1; i <= meta.getColumnCount(); i++) {
+                handler.startElement(XHTMLContentHandler.XHTML, "td", "td", 
EMPTY_ATTRIBUTES);
+                handleCell(meta, i, handler, context);
+                handler.endElement(XHTMLContentHandler.XHTML, "td", "td");
+            }
+            handler.endElement(XHTMLContentHandler.XHTML, "tr", "tr");
+        } catch (SQLException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        rows++;
+        return true;
+    }
+
+    private void handleCell(ResultSetMetaData rsmd, int i, ContentHandler 
handler, ParseContext context) throws SQLException, IOException, SAXException {
+        switch (rsmd.getColumnType(i)) {
+            case Types.BLOB:
+                handleBlob(tableName, rsmd.getColumnName(i), rows, results, i, 
handler, context);
+                break;
+            case Types.CLOB:
+                handleClob(tableName, rsmd.getColumnName(i), rows, results, i, 
handler, context);
+                break;
+            case Types.BOOLEAN:
+                handleBoolean(results.getBoolean(i), handler);
+                break;
+            case Types.DATE:
+                handleDate(results, i, handler);
+                break;
+            case Types.TIMESTAMP:
+                handleTimeStamp(results, i, handler);
+                break;
+            case Types.INTEGER:
+                handleInteger(rsmd.getColumnTypeName(i), results, i, handler);
+                break;
+            case Types.FLOAT:
+                //this is necessary to handle rounding issues in presentation
+                //Should we just use getString(i)?
+                addAllCharacters(Float.toString(results.getFloat(i)), handler);
+                break;
+            case Types.DOUBLE:
+                addAllCharacters(Double.toString(results.getDouble(i)), 
handler);
+                break;
+            default:
+                addAllCharacters(results.getString(i), handler);
+                break;
+        }
+    }
+
+    public List<String> getHeaders() throws IOException {
+        List<String> headers = new LinkedList<String>();
+        //lazy initialization
+        if (results == null) {
+            reset();
+        }
+        try {
+            ResultSetMetaData meta = results.getMetaData();
+            for (int i = 1; i <= meta.getColumnCount(); i++) {
+                headers.add(meta.getColumnName(i));
+            }
+        } catch (SQLException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        return headers;
+    }
+
+    protected void handleInteger(String columnTypeName, ResultSet rs, int 
columnIndex, ContentHandler handler) throws SQLException, SAXException {
+        addAllCharacters(Integer.toString(rs.getInt(columnIndex)), handler);
+    }
+
+    private void handleBoolean(boolean aBoolean, ContentHandler handler) 
throws SAXException {
+        addAllCharacters(Boolean.toString(aBoolean), handler);
+    }
+
+
+    protected void handleClob(String tableName, String columnName, int rowNum,
+                              ResultSet resultSet, int columnIndex,
+                              ContentHandler handler, ParseContext context) 
throws SQLException, IOException, SAXException {
+        Clob clob = resultSet.getClob(columnIndex);
+        boolean truncated = clob.length() > Integer.MAX_VALUE || clob.length() 
> maxClobLength;
+
+        int readSize = (clob.length() < maxClobLength ? (int) clob.length() : 
maxClobLength);
+        Metadata m = new Metadata();
+        m.set(Database.TABLE_NAME, tableName);
+        m.set(Database.COLUMN_NAME, columnName);
+        m.set(Database.PREFIX + "ROW_NUM", Integer.toString(rowNum));
+        m.set(Database.PREFIX + "IS_CLOB", "true");
+        m.set(Database.PREFIX + "CLOB_LENGTH", Long.toString(clob.length()));
+        m.set(Database.PREFIX + "IS_CLOB_TRUNCATED", 
Boolean.toString(truncated));
+        m.set(Metadata.CONTENT_TYPE, "text/plain; charset=UTF-8");
+        m.set(Metadata.CONTENT_LENGTH, Integer.toString(readSize));
+        m.set(TikaMetadataKeys.RESOURCE_NAME_KEY,
+                //just in case something screwy is going on with the column 
name
+                FilenameUtils.normalize(FilenameUtils.getName(columnName + "_" 
+ rowNum + ".txt")));
+
+
+        //is there a more efficient way to go from a Reader to an InputStream?
+        String s = clob.getSubString(0, readSize);
+        EmbeddedDocumentExtractor ex = 
AbstractDBParser.getEmbeddedDocumentExtractor(context);
+        ex.parseEmbedded(new ByteArrayInputStream(s.getBytes("UTF-8")), 
handler, m, true);
+    }
+
+    protected void handleBlob(String tableName, String columnName, int rowNum, 
ResultSet resultSet, int columnIndex,
+                              ContentHandler handler, ParseContext context) 
throws SQLException, IOException, SAXException {
+        Metadata m = new Metadata();
+        m.set(Database.TABLE_NAME, tableName);
+        m.set(Database.COLUMN_NAME, columnName);
+        m.set(Database.PREFIX + "ROW_NUM", Integer.toString(rowNum));
+        m.set(Database.PREFIX + "IS_BLOB", "true");
+        Blob blob = null;
+        InputStream is = null;
+        EmbeddedDocumentExtractor ex = 
AbstractDBParser.getEmbeddedDocumentExtractor(context);
+        try {
+            is = TikaInputStream.get(getInputStreamFromBlob(resultSet, 
columnIndex, blob, m));
+
+            Attributes attrs = new AttributesImpl();
+            ((AttributesImpl) attrs).addAttribute("", "type", "type", "CDATA", 
"blob");
+            ((AttributesImpl) attrs).addAttribute("", "column_name", 
"column_name", "CDATA", columnName);
+            ((AttributesImpl) attrs).addAttribute("", "row_number", 
"row_number", "CDATA", Integer.toString(rowNum));
+            handler.startElement("", "span", "span", attrs);
+            MediaType mediaType = getDetector().detect(is, new Metadata());
+            String extension = "";
+            try {
+                MimeType mimeType = 
getMimeTypes().forName(mediaType.toString());
+                m.set(Metadata.CONTENT_TYPE, mimeType.toString());
+                extension = mimeType.getExtension();
+            } catch (MimeTypeException e) {
+                //swallow
+            }
+            m.set(TikaMetadataKeys.RESOURCE_NAME_KEY,
+                    //just in case something screwy is going on with the 
column name
+                    FilenameUtils.normalize(FilenameUtils.getName(columnName + 
"_" + rowNum + extension)));
+
+            ex.parseEmbedded(is, handler, m, true);
+
+        } finally {
+            if (blob != null) {
+                try {
+                    blob.free();
+                } catch (SQLException e) {
+                    //swallow
+                }
+            }
+            IOUtils.closeQuietly(is);
+        }
+        handler.endElement("", "span", "span");
+    }
+
+    protected InputStream getInputStreamFromBlob(ResultSet resultSet, int 
columnIndex, Blob blob, Metadata metadata) throws SQLException {
+        return TikaInputStream.get(blob, metadata);
+    }
+
+    protected void handleDate(ResultSet resultSet, int columnIndex, 
ContentHandler handler) throws SAXException, SQLException {
+        addAllCharacters(resultSet.getString(columnIndex), handler);
+    }
+
+    protected void handleTimeStamp(ResultSet resultSet, int columnIndex, 
ContentHandler handler) throws SAXException, SQLException {
+        addAllCharacters(resultSet.getString(columnIndex), handler);
+    }
+
+    protected void addAllCharacters(String s, ContentHandler handler) throws 
SAXException {
+        char[] chars = s.toCharArray();
+        handler.characters(chars, 0, chars.length);
+    }
+
+    void reset() throws IOException {
+
+        if (results != null) {
+            try {
+                results.close();
+            } catch (SQLException e) {
+                //swallow
+            }
+        }
+
+        String sql = "SELECT * from " + tableName;
+        try {
+            Statement st = connection.createStatement();
+            results = st.executeQuery(sql);
+        } catch (SQLException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        rows = 0;
+    }
+
+    public String getTableName() {
+        return tableName;
+    }
+
+
+    protected TikaConfig getTikaConfig() {
+        if (tikaConfig == null) {
+            tikaConfig = TikaConfig.getDefaultConfig();
+        }
+        return tikaConfig;
+    }
+
+    protected Detector getDetector() {
+        if (detector != null) return detector;
+
+        detector = getTikaConfig().getDetector();
+        return detector;
+    }
+
+    protected MimeTypes getMimeTypes() {
+        if (mimeTypes != null) return mimeTypes;
+
+        mimeTypes = getTikaConfig().getMimeRepository();
+        return mimeTypes;
+    }
+
+}

Added: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java?rev=1659545&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
 (added)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3DBParser.java
 Fri Feb 13 12:43:56 2015
@@ -0,0 +1,116 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.io.IOExceptionWithCause;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.sqlite.SQLiteConfig;
+
+/**
+ * This is the implementation of the db parser for SQLite.
+ * <p>
+ * This parser is internal only; it should not be registered in the services
+ * file or configured in the TikaConfig xml file.
+ */
+class SQLite3DBParser extends AbstractDBParser {
+
+    protected static final String SQLITE_CLASS_NAME = "org.sqlite.JDBC";
+
+    /**
+     *
+     * @param context context
+     * @return null (always)
+     */
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return null;
+    }
+
+    @Override
+    protected Connection getConnection(InputStream stream, Metadata metadata, 
ParseContext context) throws IOException {
+        String connectionString = getConnectionString(stream, metadata, 
context);
+
+        Connection connection = null;
+        try {
+            Class.forName(getJDBCClassName());
+        } catch (ClassNotFoundException e) {
+            throw new IOExceptionWithCause(e);
+        }
+        try{
+            SQLiteConfig config = new SQLiteConfig();
+
+            //good habit, but effectively meaningless here
+            config.setReadOnly(true);
+            connection = config.createConnection(connectionString);
+
+        } catch (SQLException e) {
+            throw new IOException(e.getMessage());
+        }
+        return connection;
+    }
+
+    @Override
+    protected String getConnectionString(InputStream is, Metadata metadata, 
ParseContext context) throws IOException {
+        File dbFile = TikaInputStream.get(is).getFile();
+        return "jdbc:sqlite:"+dbFile.getAbsolutePath();
+    }
+
+    @Override
+    protected String getJDBCClassName() {
+        return SQLITE_CLASS_NAME;
+    }
+
+    @Override
+    protected List<String> getTableNames(Connection connection, Metadata 
metadata,
+                                         ParseContext context) throws 
SQLException {
+        List<String> tableNames = new LinkedList<String>();
+
+        Statement st = null;
+        try {
+            st = connection.createStatement();
+            String sql = "SELECT name FROM sqlite_master WHERE type='table'";
+            ResultSet rs = st.executeQuery(sql);
+
+            while (rs.next()) {
+                tableNames.add(rs.getString(1));
+            }
+        } finally {
+            if (st != null)
+                st.close();
+        }
+        return tableNames;
+    }
+
+    @Override
+    public JDBCTableReader getTableReader(Connection connection, String 
tableName, ParseContext context) {
+        return new SQLite3TableReader(connection, tableName, context);
+    }
+}

Added: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java?rev=1659545&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
 (added)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3Parser.java
 Fri Feb 13 12:43:56 2015
@@ -0,0 +1,78 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * This is the main class for parsing SQLite3 files.  When {@link #parse} is 
called,
+ * this creates a new {@link org.apache.tika.parser.jdbc.SQLite3DBParser}.
+ * <p>
+ * Given potential conflicts of native libraries in web servers, users will
+ * need to add org.xerial's sqlite-jdbc jar to the class path for this parser
+ * to work.  For development and testing, this jar is specified in 
tika-parsers'
+ * pom.xml, but it is currently set to "provided."
+ * <p>
+ * Note that this family of jdbc parsers is designed to treat each CLOB and 
each BLOB
+ * as embedded documents.
+ *
+ */
+public class SQLite3Parser extends AbstractParser {
+    /** Serial version UID */
+    private static final long serialVersionUID = -752276948656079347L;
+
+    private static final MediaType MEDIA_TYPE = 
MediaType.application("x-sqlite3");
+
+    private final Set<MediaType> SUPPORTED_TYPES;
+
+    /**
+     * Checks to see if class is available for org.sqlite.JDBC.
+     * <p>
+     * If not, this class will return an EMPTY_SET for  getSupportedTypes()
+     */
+    public SQLite3Parser() {
+        Set<MediaType> tmp;
+        try {
+            Class.forName(SQLite3DBParser.SQLITE_CLASS_NAME);
+            tmp = Collections.singleton(MEDIA_TYPE);
+        } catch (ClassNotFoundException e) {
+            tmp = Collections.EMPTY_SET;
+        }
+        SUPPORTED_TYPES = tmp;
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata 
metadata, ParseContext context) throws IOException, SAXException, TikaException 
{
+        SQLite3DBParser p = new SQLite3DBParser();
+        p.parse(stream, handler, metadata, context);
+    }
+}

Added: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java?rev=1659545&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
 (added)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/SQLite3TableReader.java
 Fri Feb 13 12:43:56 2015
@@ -0,0 +1,110 @@
+package org.apache.tika.parser.jdbc;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.sql.Blob;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Locale;
+
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+
+
+/**
+ * Concrete class for SQLLite table parsing.  This overrides
+ * column type handling from JDBCRowHandler.
+ * <p>
+ * This class is not designed to be thread safe (because of DateFormat)!
+ * Need to call a new instance for each parse, as AbstractDBParser does.
+ * <p>
+ * For now, this silently skips cells of type CLOB, because xerial's jdbc 
connector
+ * does not currently support them.
+ */
+class SQLite3TableReader extends JDBCTableReader {
+
+
+    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
+
+    public SQLite3TableReader(Connection connection, String tableName, 
ParseContext context) {
+        super(connection, tableName, context);
+    }
+
+
+    /**
+     * No-op for now in {@link SQLite3TableReader}.
+     *
+     * @param tableName
+     * @param fieldName
+     * @param rowNum
+     * @param resultSet
+     * @param columnIndex
+     * @param handler
+     * @param context
+     * @throws java.sql.SQLException
+     * @throws java.io.IOException
+     * @throws org.xml.sax.SAXException
+     */
+    @Override
+    protected void handleClob(String tableName, String fieldName, int rowNum,
+                              ResultSet resultSet, int columnIndex,
+                              ContentHandler handler, ParseContext context) 
throws SQLException, IOException, SAXException {
+        //no-op for now.
+    }
+
+    /**
+     * The jdbc connection to Sqlite does not yet implement blob, have to 
getBytes().
+     *
+     * @param resultSet   resultSet
+     * @param columnIndex columnIndex for blob
+     * @return
+     * @throws java.sql.SQLException
+     */
+    @Override
+    protected InputStream getInputStreamFromBlob(ResultSet resultSet, int 
columnIndex, Blob blob, Metadata m) throws SQLException {
+        return TikaInputStream.get(resultSet.getBytes(columnIndex), m);
+    }
+
+    @Override
+    protected void handleInteger(String columnTypeName, ResultSet rs, int 
columnIndex,
+                                 ContentHandler handler) throws SQLException, 
SAXException {
+        //As of this writing, with xerial's sqlite jdbc connector, a timestamp 
is
+        //stored as a column of type Integer, but the columnTypeName is 
TIMESTAMP, and the
+        //value is a string representing a Long.
+        if (columnTypeName.equals("TIMESTAMP")) {
+            
addAllCharacters(parseDateFromLongString(rs.getString(columnIndex)), handler);
+        } else {
+            addAllCharacters(Integer.toString(rs.getInt(columnIndex)), 
handler);
+        }
+
+    }
+
+    private String parseDateFromLongString(String longString) throws 
SAXException {
+        java.sql.Date d = new java.sql.Date(Long.parseLong(longString));
+        return dateFormat.format(d);
+
+    }
+}

Added: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java?rev=1659545&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
 (added)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
 Fri Feb 13 12:43:56 2015
@@ -0,0 +1,360 @@
+package org.apache.tika.parser.jdbc;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.extractor.EmbeddedResourceHandler;
+import org.apache.tika.extractor.ParserContainerExtractor;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Database;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ToXMLContentHandler;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+
+public class SQLite3ParserTest extends TikaTest {
+    private final static String TEST_FILE_NAME = "testSqlite3b.db";
+    private final static String TEST_FILE1 = 
"/test-documents/"+TEST_FILE_NAME;;
+
+    @Test
+    public void testBasic() throws Exception {
+        Parser p = new AutoDetectParser();
+
+        //test different types of input streams
+        //actual inputstream, memory buffered bytearray and literal file
+        InputStream[] streams = new InputStream[3];
+        streams[0] = getResourceAsStream(TEST_FILE1);
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        IOUtils.copy(getResourceAsStream(TEST_FILE1), bos);
+        streams[1] = new ByteArrayInputStream(bos.toByteArray());
+        streams[2] = TikaInputStream.get(getResourceAsFile(TEST_FILE1));
+        int tests = 0;
+        for (InputStream stream : streams) {
+            Metadata metadata = new Metadata();
+            metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+            //1) getXML closes the stream
+            //2) getXML runs recursively on the contents, so the embedded docs 
should show up
+            XMLResult result = getXML(stream, p, metadata);
+            String x = result.xml;
+            //first table name
+            assertContains("<table 
name=\"my_table1\"><thead><tr>\t<th>INT_COL</th>", x);
+            //non-ascii
+            assertContains("<td>æ®ææ¯é¡¿å¤§å¦</td>", x);
+            //boolean
+            assertContains("<td>true</td>\t<td>2015-01-02</td>", x);
+            //date test
+            assertContains("2015-01-04", x);
+            //timestamp test
+            assertContains("2015-01-03 15:17:03", x);
+            //first embedded doc's image tag
+            assertContains("alt=\"image1.png\"", x);
+            //second embedded doc's image tag
+            assertContains("alt=\"A description...\"", x);
+            //second table name
+            assertContains("<table 
name=\"my_table2\"><thead><tr>\t<th>INT_COL2</th>", x);
+
+            Metadata post = result.metadata;
+            String[] tableNames = post.getValues(Database.TABLE_NAME);
+            assertEquals(2, tableNames.length);
+            assertEquals("my_table1", tableNames[0]);
+            assertEquals("my_table2", tableNames[1]);
+            tests++;
+        }
+        assertEquals(3, tests);
+    }
+
+    //make sure that table cells and rows are properly marked to
+    //yield \t and \n at the appropriate places
+    @Test
+    public void testSpacesInBodyContentHandler()  throws Exception {
+        Parser p = new AutoDetectParser();
+        InputStream stream = null;
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+        ContentHandler handler = new BodyContentHandler(-1);
+        ParseContext ctx = new ParseContext();
+        ctx.set(Parser.class, p);
+        try {
+            stream = getResourceAsStream(TEST_FILE1);
+            p.parse(stream, handler, metadata, ctx);
+        } finally {
+            stream.close();
+        }
+        String s = handler.toString();
+        assertContains("0\t2.3\t2.4\tlorem", s);
+        assertContains("tempor\n", s);
+    }
+
+    //test what happens if the user forgets to pass in a parser via context
+    //to handle embedded documents
+    @Test
+    public void testNotAddingEmbeddedParserToParseContext() throws Exception {
+        Parser p = new AutoDetectParser();
+
+        InputStream is = getResourceAsStream(TEST_FILE1);
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+        ContentHandler handler = new ToXMLContentHandler();
+        p.parse(is, handler, metadata, new ParseContext());
+        String xml = handler.toString();
+        //just includes headers for embedded documents
+        assertContains("<table name=\"my_table1\"><thead><tr>", xml);
+        assertContains("<td><span type=\"blob\" column_name=\"BYTES_COL\" 
row_number=\"0\"><div class=\"package-entry\"><h1>BYTES_COL_0.doc</h1>", xml);
+        //but no other content
+        assertNotContained("dog", xml);
+        assertNotContained("alt=\"image1.png\"", xml);
+        //second embedded doc's image tag
+        assertNotContained("alt=\"A description...\"", xml);
+    }
+
+    @Test
+    public void testRecursiveParserWrapper() throws Exception {
+        Parser p = new AutoDetectParser();
+
+        RecursiveParserWrapper wrapper =
+                new RecursiveParserWrapper(p, new BasicContentHandlerFactory(
+                        BasicContentHandlerFactory.HANDLER_TYPE.BODY, -1));
+        InputStream is = getResourceAsStream(TEST_FILE1);
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+        wrapper.parse(is, new BodyContentHandler(-1), metadata, new 
ParseContext());
+        List<Metadata> metadataList = wrapper.getMetadata();
+        int i = 0;
+        assertEquals(5, metadataList.size());
+        //make sure the \t are inserted in a body handler
+
+        String table = 
metadataList.get(0).get(RecursiveParserWrapper.TIKA_CONTENT);
+        assertContains("0\t2.3\t2.4\tlorem", table);
+        assertContains("æ®ææ¯é¡¿å¤§å¦", table);
+
+        //make sure the \n is inserted
+        String table2 = 
metadataList.get(0).get(RecursiveParserWrapper.TIKA_CONTENT);
+        assertContains("do eiusmod tempor\n", table2);
+
+        assertContains("The quick brown fox", 
metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
+        assertContains("The quick brown fox", 
metadataList.get(4).get(RecursiveParserWrapper.TIKA_CONTENT));
+
+        //confirm .doc was added to blob
+        assertEquals("testSqlite3b.db/BYTES_COL_0.doc/image1.png", 
metadataList.get(1).get(RecursiveParserWrapper.EMBEDDED_RESOURCE_PATH));
+    }
+
+    @Test
+    public void testParserContainerExtractor() throws Exception {
+        //There should be 6 embedded documents:
+        //2x tables -- UTF-8 csv representations of the tables
+        //2x word files, one doc and one docx
+        //2x png files, the same image embedded in each of the doc and docx
+
+        ParserContainerExtractor ex = new ParserContainerExtractor();
+        ByteCopyingHandler byteCopier = new ByteCopyingHandler();
+        InputStream is = getResourceAsStream(TEST_FILE1);
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+        ex.extract(TikaInputStream.get(is), ex, byteCopier);
+
+        assertEquals(4, byteCopier.bytes.size());
+        String[] strings = new String[4];
+        for (int i = 1; i < byteCopier.bytes.size(); i++) {
+            byte[] byteArr = byteCopier.bytes.get(i);
+            String s = new String(byteArr, 0, Math.min(byteArr.length,1000), 
"UTF-8");
+            strings[i] = s;
+        }
+        byte[] oleBytes = new byte[]{
+                (byte)-48,
+                (byte)-49,
+                (byte)17,
+                (byte)-32,
+                (byte)-95,
+                (byte)-79,
+                (byte)26,
+                (byte)-31,
+                (byte)0,
+                (byte)0,
+        };
+        //test OLE
+        for (int i = 0; i < 10; i++) {
+            assertEquals(oleBytes[i], byteCopier.bytes.get(0)[i]);
+        }
+        assertContains("PNG", strings[1]);
+        assertContains("PK", strings[2]);
+        assertContains("PNG", strings[3]);
+    }
+
+    //This confirms that reading the stream twice is not
+    //quadrupling the number of attachments.
+    @Test
+    public void testInputStreamReset() throws Exception {
+        //There should be 8 embedded documents:
+        //4x word files, two docs and two docxs
+        //4x png files, the same image embedded in each of the doc and docx
+
+        ParserContainerExtractor ex = new ParserContainerExtractor();
+        InputStreamResettingHandler byteCopier = new 
InputStreamResettingHandler();
+        InputStream is = getResourceAsStream(TEST_FILE1);
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+        ex.extract(TikaInputStream.get(is), ex, byteCopier);
+        is.reset();
+        assertEquals(8, byteCopier.bytes.size());
+    }
+
+
+
+    public static class InputStreamResettingHandler implements 
EmbeddedResourceHandler {
+
+        public List<byte[]> bytes = new ArrayList<byte[]>();
+
+        @Override
+        public void handle(String filename, MediaType mediaType,
+                           InputStream stream) {
+            ByteArrayOutputStream os = new ByteArrayOutputStream();
+            if (! stream.markSupported()) {
+                stream = TikaInputStream.get(stream);
+            }
+            stream.mark(1000000);
+            try {
+                IOUtils.copy(stream, os);
+                bytes.add(os.toByteArray());
+                stream.reset();
+                //now try again
+                os.reset();
+                IOUtils.copy(stream, os);
+                bytes.add(os.toByteArray());
+                stream.reset();
+            } catch (IOException e) {
+                //swallow
+            }
+        }
+    }
+
+    //code used for creating the test file
+/*
+    private Connection getConnection(String dbFileName) throws Exception {
+        File testDirectory = new 
File(this.getClass().getResource("/test-documents").toURI());
+        System.out.println("Writing to: " + testDirectory.getAbsolutePath());
+        File testDB = new File(testDirectory, dbFileName);
+        Connection c = null;
+        try {
+            Class.forName("org.sqlite.JDBC");
+            c = DriverManager.getConnection("jdbc:sqlite:" + 
testDB.getAbsolutePath());
+        } catch ( Exception e ) {
+            System.err.println( e.getClass().getName() + ": " + e.getMessage() 
);
+            System.exit(0);
+        }
+        return c;
+    }
+
+    @Test
+    public void testCreateDB() throws Exception {
+        Connection c = getConnection("testSQLLite3b.db");
+        Statement st = c.createStatement();
+        String sql = "DROP TABLE if exists my_table1";
+        st.execute(sql);
+        sql = "CREATE TABLE my_table1 (" +
+                "INT_COL INT PRIMARY KEY, "+
+                "FLOAT_COL FLOAT, " +
+                "DOUBLE_COL DOUBLE, " +
+                "CHAR_COL CHAR(30), "+
+                "VARCHAR_COL VARCHAR(30), "+
+                "BOOLEAN_COL BOOLEAN,"+
+                "DATE_COL DATE,"+
+                "TIME_STAMP_COL TIMESTAMP,"+
+                "BYTES_COL BYTES" +
+        ")";
+        st.execute(sql);
+        sql = "insert into my_table1 (INT_COL, FLOAT_COL, DOUBLE_COL, 
CHAR_COL, " +
+                "VARCHAR_COL, BOOLEAN_COL, DATE_COL, TIME_STAMP_COL, 
BYTES_COL) " +
+                "values (?,?,?,?,?,?,?,?,?)";
+        SimpleDateFormat f = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+        java.util.Date d = f.parse("2015-01-03 15:17:03");
+        System.out.println(d.getTime());
+        long d1Long = 1420229823000L;// 2015-01-02 15:17:03
+        long d2Long = 1420316223000L;// 2015-01-03 15:17:03
+        PreparedStatement ps = c.prepareStatement(sql);
+        ps.setInt(1, 0);
+        ps.setFloat(2, 2.3f);
+        ps.setDouble(3, 2.4d);
+        ps.setString(4, "lorem");
+        ps.setString(5, "æ®ææ¯é¡¿å¤§å¦");
+        ps.setBoolean(6, true);
+        ps.setString(7, "2015-01-02");
+        ps.setString(8, "2015-01-03 15:17:03");
+//        ps.setClob(9, new StringReader(clobString));
+        ps.setBytes(9, 
getByteArray(this.getClass().getResourceAsStream("/test-documents/testWORD_1img.doc")));//contains
 "quick brown fox"
+        ps.executeUpdate();
+        ps.clearParameters();
+
+        ps.setInt(1, 1);
+        ps.setFloat(2, 4.6f);
+        ps.setDouble(3, 4.8d);
+        ps.setString(4, "dolor");
+        ps.setString(5, "sit");
+        ps.setBoolean(6, false);
+        ps.setString(7, "2015-01-04");
+        ps.setString(8, "2015-01-03 15:17:03");
+        //ps.setClob(9, new StringReader("consectetur adipiscing elit"));
+        ps.setBytes(9, 
getByteArray(this.getClass().getResourceAsStream("/test-documents/testWORD_1img.docx")));//contains
 "The end!"
+
+        ps.executeUpdate();
+
+        //build table2
+        sql = "DROP TABLE if exists my_table2";
+        st.execute(sql);
+
+        sql = "CREATE TABLE my_table2 (" +
+                "INT_COL2 INT PRIMARY KEY, "+
+                "VARCHAR_COL2 VARCHAR(64))";
+        st.execute(sql);
+        sql = "INSERT INTO my_table2 values(0,'sed, do eiusmod tempor')";
+        st.execute(sql);
+        sql = "INSERT INTO my_table2 values(1,'incididunt \nut labore')";
+        st.execute(sql);
+
+        c.close();
+    }
+
+    private byte[] getByteArray(InputStream is) throws IOException {
+        ByteArrayOutputStream bos = new ByteArrayOutputStream();
+        byte[] buff = new byte[1024];
+        for (int bytesRead; (bytesRead = is.read(buff)) != -1;) {
+            bos.write(buff, 0, bytesRead);
+        }
+        return bos.toByteArray();
+    }
+
+*/
+
+
+}

svn commit: r1659545 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/jdbc/ test/java/org/apache/tika/parser/jdbc/

Reply via email to